gix-filter-0.11.1/.cargo_vcs_info.json0000644000000001500000000000100131620ustar { "git": { "sha1": "095c6739b2722a8b9af90776b435ef2da454c0e6" }, "path_in_vcs": "gix-filter" }gix-filter-0.11.1/Cargo.lock0000644000000410750000000000100111500ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "autocfg" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" [[package]] name = "bstr" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c48f0051a4b4c5e0b6d365cd04af53aeaa209e3cc15ec2cdb69e73cc87fbd0dc" dependencies = [ "memchr", "regex-automata", "serde", ] [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "dashmap" version = "5.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", "hashbrown", "lock_api", "once_cell", "parking_lot_core", ] [[package]] name = "deranged" version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eb30d70a07a3b04884d2677f06bec33509dc67ca60d92949e5535352d3191dc" dependencies = [ "powerfmt", ] [[package]] name = "encoding_rs" version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" dependencies = [ "cfg-if", ] [[package]] name = "faster-hex" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2a2b11eda1d40935b26cf18f6833c526845ae8c41e58d09af6adeb6f0269183" [[package]] name = "fastrand" version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" [[package]] name = "gix-actor" version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45c3a3bde455ad2ee8ba8a195745241ce0b770a8a26faae59fcf409d01b28c46" dependencies = [ "bstr", "gix-date", "gix-utils", "itoa", "thiserror", "winnow", ] [[package]] name = "gix-attributes" version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eefb48f42eac136a4a0023f49a54ec31be1c7a9589ed762c45dcb9b953f7ecc8" dependencies = [ "bstr", "gix-glob", "gix-path", "gix-quote", "gix-trace", "kstring", "smallvec", "thiserror", "unicode-bom", ] [[package]] name = "gix-command" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90009020dc4b3de47beed28e1334706e0a330ddd17f5cfeb097df3b15a54b77" dependencies = [ "bstr", "gix-path", "gix-trace", "shell-words", ] [[package]] name = "gix-date" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "180b130a4a41870edfbd36ce4169c7090bca70e195da783dea088dd973daa59c" dependencies = [ "bstr", "itoa", "thiserror", "time", ] [[package]] name = "gix-features" version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db4254037d20a247a0367aa79333750146a369719f0c6617fec4f5752cc62b37" dependencies = [ "gix-hash", "gix-trace", "libc", "prodash", "sha1_smol", ] [[package]] name = "gix-filter" version = "0.11.1" dependencies = [ "bstr", "encoding_rs", "gix-attributes", "gix-command", "gix-hash", "gix-object", "gix-packetline-blocking", "gix-path", "gix-quote", "gix-trace", "gix-utils", "serial_test", "smallvec", "thiserror", ] [[package]] name = "gix-glob" version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "682bdc43cb3c00dbedfcc366de2a849b582efd8d886215dbad2ea662ec156bb5" dependencies = [ "bitflags 2.4.1", "bstr", "gix-features", "gix-path", ] [[package]] name = "gix-hash" version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f93d7df7366121b5018f947a04d37f034717e113dcf9ccd85c34b58e57a74d5e" dependencies = [ "faster-hex", "thiserror", ] [[package]] name = "gix-object" version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d4f8efae72030df1c4a81d02dbe2348e748d9b9a11e108ed6efbd846326e051" dependencies = [ "bstr", "gix-actor", "gix-date", "gix-features", "gix-hash", "gix-utils", "gix-validate", "itoa", "smallvec", "thiserror", "winnow", ] [[package]] name = "gix-packetline-blocking" version = "0.17.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c31d42378a3d284732e4d589979930d0d253360eccf7ec7a80332e5ccb77e14a" dependencies = [ "bstr", "faster-hex", "gix-trace", "thiserror", ] [[package]] name = "gix-path" version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23623cf0f475691a6d943f898c4d0b89f5c1a2a64d0f92bce0e0322ee6528783" dependencies = [ "bstr", "gix-trace", "home", "once_cell", "thiserror", ] [[package]] name = "gix-quote" version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cbff4f9b9ea3fa7a25a70ee62f545143abef624ac6aa5884344e70c8b0a1d9ff" dependencies = [ "bstr", "gix-utils", "thiserror", ] [[package]] name = "gix-trace" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f924267408915fddcd558e3f37295cc7d6a3e50f8bd8b606cee0808c3915157e" [[package]] name = "gix-utils" version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35192df7fd0fa112263bad8021e2df7167df4cc2a6e6d15892e1e55621d3d4dc" dependencies = [ "fastrand", "unicode-normalization", ] [[package]] name = "gix-validate" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e39fc6e06044985eac19dd34d474909e517307582e462b2eb4c8fa51b6241545" dependencies = [ "bstr", "thiserror", ] [[package]] name = "hashbrown" version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" [[package]] name = "home" version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" dependencies = [ "windows-sys", ] [[package]] name = "itoa" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "kstring" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec3066350882a1cd6d950d055997f379ac37fd39f81cd4d8ed186032eb3c5747" dependencies = [ "static_assertions", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "lock_api" version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" dependencies = [ "autocfg", "scopeguard", ] [[package]] name = "memchr" version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] name = "num_threads" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" dependencies = [ "libc", ] [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "parking_lot" version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", "parking_lot_core", ] [[package]] name = "parking_lot_core" version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", "windows-targets", ] [[package]] name = "powerfmt" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "proc-macro2" version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "907a61bd0f64c2f29cd1cf1dc34d05176426a3f504a78010f08416ddb7b13708" dependencies = [ "unicode-ident", ] [[package]] name = "prodash" version = "28.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "744a264d26b88a6a7e37cbad97953fa233b94d585236310bcbc88474b4092d79" [[package]] name = "quote" version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] [[package]] name = "redox_syscall" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ "bitflags 1.3.2", ] [[package]] name = "regex-automata" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serial_test" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e56dd856803e253c8f298af3f4d7eb0ae5e23a737252cd90bb4f3b435033b2d" dependencies = [ "dashmap", "lazy_static", "parking_lot", "serial_test_derive", ] [[package]] name = "serial_test_derive" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91d129178576168c589c9ec973feedf7d3126c01ac2bf08795109aa35b69fb8f" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "sha1_smol" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012" [[package]] name = "shell-words" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" [[package]] name = "smallvec" version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "static_assertions" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" version = "2.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1726efe18f42ae774cc644f330953a5e7b3c3003d3edcecf18850fe9d4dd9afb" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "thiserror" version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "time" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f657ba42c3f86e7680e53c8cd3af8abbe56b5491790b46e22e19c0d57463583e" dependencies = [ "deranged", "itoa", "libc", "num_threads", "powerfmt", "serde", "time-core", "time-macros", ] [[package]] name = "time-core" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26197e33420244aeb70c3e8c78376ca46571bc4e701e4791c2cd9f57dcb3a43f" dependencies = [ "time-core", ] [[package]] name = "tinyvec" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" dependencies = [ "tinyvec_macros", ] [[package]] name = "tinyvec_macros" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "unicode-bom" version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7eec5d1121208364f6793f7d2e222bf75a915c19557537745b195b253dd64217" [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-normalization" version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" dependencies = [ "tinyvec", ] [[package]] name = "windows-sys" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_i686_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_x86_64_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "winnow" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b1dbce9e90e5404c5a52ed82b1d13fc8cfbdad85033b6f57546ffd1265f8451" dependencies = [ "memchr", ] gix-filter-0.11.1/Cargo.toml0000644000000030650000000000100111700ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.65" name = "gix-filter" version = "0.11.1" authors = ["Sebastian Thiel "] include = [ "src/**/*", "LICENSE-*", ] description = "A crate of the gitoxide project implementing git filters" license = "MIT OR Apache-2.0" repository = "https://github.com/Byron/gitoxide" [lib] doctest = false [dependencies.bstr] version = "1.5.0" features = ["std"] default-features = false [dependencies.encoding_rs] version = "0.8.32" [dependencies.gix-attributes] version = "^0.22.2" [dependencies.gix-command] version = "^0.3.6" [dependencies.gix-hash] version = "^0.14.2" [dependencies.gix-object] version = "^0.42.0" [dependencies.gix-packetline] version = "^0.17.4" package = "gix-packetline-blocking" [dependencies.gix-path] version = "^0.10.7" [dependencies.gix-quote] version = "^0.4.12" [dependencies.gix-trace] version = "^0.1.9" [dependencies.gix-utils] version = "^0.1.12" [dependencies.smallvec] version = "1.10.0" [dependencies.thiserror] version = "1.0.38" [dev-dependencies.serial_test] version = "2.0.0" default-features = false gix-filter-0.11.1/Cargo.toml.orig000064400000000000000000000024671046102023000146560ustar 00000000000000[package] name = "gix-filter" version = "0.11.1" repository = "https://github.com/Byron/gitoxide" license = "MIT OR Apache-2.0" description = "A crate of the gitoxide project implementing git filters" authors = ["Sebastian Thiel "] edition = "2021" rust-version = "1.65" include = ["src/**/*", "LICENSE-*"] [lib] doctest = false [dependencies] gix-hash = { version = "^0.14.2", path = "../gix-hash" } gix-trace = { version = "^0.1.9", path = "../gix-trace" } gix-object = { version = "^0.42.0", path = "../gix-object" } gix-command = { version = "^0.3.6", path = "../gix-command" } gix-quote = { version = "^0.4.12", path = "../gix-quote" } gix-utils = { version = "^0.1.12", path = "../gix-utils" } gix-path = { version = "^0.10.7", path = "../gix-path" } gix-packetline = { package = "gix-packetline-blocking", version = "^0.17.4", path = "../gix-packetline-blocking" } gix-attributes = { version = "^0.22.2", path = "../gix-attributes" } encoding_rs = "0.8.32" bstr = { version = "1.5.0", default-features = false, features = ["std"] } thiserror = "1.0.38" smallvec = "1.10.0" [dev-dependencies] serial_test = { version = "2.0.0", default-features = false } gix-testtools = { path = "../tests/tools" } gix-worktree = { path = "../gix-worktree", default-features = false, features = ["attributes"] } gix-filter-0.11.1/LICENSE-APACHE000064400000000000000000000247461046102023000137170ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. gix-filter-0.11.1/LICENSE-MIT000064400000000000000000000017771046102023000134260ustar 00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. gix-filter-0.11.1/src/driver/apply.rs000064400000000000000000000244731046102023000155450ustar 00000000000000use std::collections::HashMap; use bstr::{BStr, BString}; use crate::{ driver, driver::{process, process::client::invoke, Operation, Process, State}, Driver, }; /// What to do if delay is supported by a process filter. #[derive(Default, Debug, Copy, Clone)] pub enum Delay { /// Use delayed processing for this entry. /// /// Note that it's up to the filter to determine whether or not the processing should be delayed. #[default] Allow, /// Do not delay the processing, and force it to happen immediately. In this case, no delayed processing will occur /// even if the filter supports it. /// /// This is the default as it requires no special precautions to be taken by the caller as /// outputs will be produced immediately. Forbid, } /// The error returned by [State::apply()][super::State::apply()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error(transparent)] Init(#[from] driver::init::Error), #[error("Could not write entire object to driver")] WriteSource(#[from] std::io::Error), #[error("Filter process delayed an entry even though that was not requested")] DelayNotAllowed, #[error("Failed to invoke '{command}' command")] ProcessInvoke { source: process::client::invoke::Error, command: String, }, #[error("The invoked command '{command}' in process indicated an error: {status:?}")] ProcessStatus { status: driver::process::Status, command: String, }, } /// Additional information for use in the [`State::apply()`] method. #[derive(Debug, Copy, Clone)] pub struct Context<'a, 'b> { /// The repo-relative using slashes as separator of the entry currently being processed. pub rela_path: &'a BStr, /// The name of the reference that `HEAD` is pointing to. It's passed to `process` filters if present. pub ref_name: Option<&'b BStr>, /// The root-level tree that contains the current entry directly or indirectly, or the commit owning the tree (if available). /// /// This is passed to `process` filters if present. pub treeish: Option, /// The actual blob-hash of the data we are processing. It's passed to `process` filters if present. /// /// Note that this hash might be different from the `$Id$` of the respective `ident` filter, as the latter generates the hash itself. pub blob: Option, } /// Apply operations to filter programs. impl State { /// Apply `operation` of `driver` to the bytes read from `src` and return a reader to immediately consume the output /// produced by the filter. `rela_path` is the repo-relative path of the entry to handle. /// It's possible that the filter stays inactive, in which case the `src` isn't consumed and has to be used by the caller. /// /// Each call to this method will cause the corresponding filter to be invoked unless `driver` indicates a `process` filter, /// which is only launched once and maintained using this state. /// /// Note that it's not an error if there is no filter process for `operation` or if a long-running process doesn't supported /// the desired capability. /// /// ### Deviation /// /// If a long running process returns the 'abort' status after receiving the data, it will be removed similar to how `git` does it. /// However, it delivers an unsuccessful error status later, it will not be removed, but reports the error only. /// If any other non-'error' status is received, the process will be stopped. But that doesn't happen if if such a status is received /// after reading the filtered result. pub fn apply<'a>( &'a mut self, driver: &Driver, src: &mut impl std::io::Read, operation: Operation, ctx: Context<'_, '_>, ) -> Result>, Error> { match self.apply_delayed(driver, src, operation, Delay::Forbid, ctx)? { Some(MaybeDelayed::Delayed(_)) => { unreachable!("we forbid delaying the entry") } Some(MaybeDelayed::Immediate(read)) => Ok(Some(read)), None => Ok(None), } } /// Like [`apply()]`[Self::apply()], but use `delay` to determine if the filter result may be delayed or not. /// /// Poll [`list_delayed_paths()`][Self::list_delayed_paths()] until it is empty and query the available paths again. /// Note that even though it's possible, the API assumes that commands aren't mixed when delays are allowed. pub fn apply_delayed<'a>( &'a mut self, driver: &Driver, src: &mut impl std::io::Read, operation: Operation, delay: Delay, ctx: Context<'_, '_>, ) -> Result>, Error> { match self.maybe_launch_process(driver, operation, ctx.rela_path)? { Some(Process::SingleFile { mut child, command }) => { std::io::copy(src, &mut child.stdin.take().expect("configured"))?; Ok(Some(MaybeDelayed::Immediate(Box::new(ReadFilterOutput { inner: child.stdout.take(), child: driver.required.then_some((child, command)), })))) } Some(Process::MultiFile { client, key }) => { let command = operation.as_str(); if !client.capabilities().contains(command) { return Ok(None); } let invoke_result = client.invoke( command, &mut [ ("pathname", Some(ctx.rela_path.to_owned())), ("ref", ctx.ref_name.map(ToOwned::to_owned)), ("treeish", ctx.treeish.map(|id| id.to_hex().to_string().into())), ("blob", ctx.blob.map(|id| id.to_hex().to_string().into())), ( "can-delay", match delay { Delay::Allow if client.capabilities().contains("delay") => Some("1".into()), Delay::Forbid | Delay::Allow => None, }, ), ] .into_iter() .filter_map(|(key, value)| value.map(|v| (key, v))), src, ); let status = match invoke_result { Ok(status) => status, Err(err) => { let invoke::Error::Io(io_err) = &err; handle_io_err(io_err, &mut self.running, key.0.as_ref()); return Err(Error::ProcessInvoke { command: command.into(), source: err, }); } }; if status.is_delayed() { if matches!(delay, Delay::Forbid) { return Err(Error::DelayNotAllowed); } Ok(Some(MaybeDelayed::Delayed(key))) } else if status.is_success() { // TODO: find a way to not have to do the 'borrow-dance'. let client = self.running.remove(&key.0).expect("present for borrowcheck dance"); self.running.insert(key.0.clone(), client); let client = self.running.get_mut(&key.0).expect("just inserted"); Ok(Some(MaybeDelayed::Immediate(Box::new(client.as_read())))) } else { let message = status.message().unwrap_or_default(); match message { "abort" => { client.capabilities_mut().remove(command); } "error" => {} _strange => { let client = self.running.remove(&key.0).expect("we definitely have it"); client.into_child().kill().ok(); } } Err(Error::ProcessStatus { command: command.into(), status, }) } } None => Ok(None), } } } /// A type to represent delayed or immediate apply-filter results. pub enum MaybeDelayed<'a> { /// Using the delayed protocol, this entry has been sent to a long-running process and needs to be /// checked for again, later, using the [`driver::Key`] to refer to the filter who owes a response. /// /// Note that the path to the entry is also needed to obtain the filtered result later. Delayed(driver::Key), /// The filtered result can be read from the contained reader right away. /// /// Note that it must be consumed in full or till a read error occurs. Immediate(Box), } /// A utility type to facilitate streaming the output of a filter process. struct ReadFilterOutput { inner: Option, /// The child is present if we need its exit code to be positive. child: Option<(std::process::Child, std::process::Command)>, } pub(crate) fn handle_io_err(err: &std::io::Error, running: &mut HashMap, process: &BStr) { if matches!( err.kind(), std::io::ErrorKind::BrokenPipe | std::io::ErrorKind::UnexpectedEof ) { running.remove(process).expect("present or we wouldn't be here"); } } impl std::io::Read for ReadFilterOutput { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { match self.inner.as_mut() { Some(inner) => { let num_read = inner.read(buf)?; if num_read == 0 { self.inner.take(); if let Some((mut child, cmd)) = self.child.take() { let status = child.wait()?; if !status.success() { return Err(std::io::Error::new( std::io::ErrorKind::Other, format!("Driver process {cmd:?} failed"), )); } } } Ok(num_read) } None => Ok(0), } } } gix-filter-0.11.1/src/driver/delayed.rs000064400000000000000000000137611046102023000160250ustar 00000000000000use bstr::{BStr, BString}; use crate::{ driver, driver::{apply::handle_io_err, Operation, State}, }; /// #[allow(clippy::empty_docs)] pub mod list { use crate::driver; /// The error returned by [State::list_delayed_paths()][super::State::list_delayed_paths()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Could not get process named '{}' which should be running and tracked", wanted.0)] ProcessMissing { wanted: driver::Key }, #[error("Failed to run 'list_available_blobs' command")] ProcessInvoke(#[from] driver::process::client::invoke::without_content::Error), #[error("The invoked command 'list_available_blobs' in process indicated an error: {status:?}")] ProcessStatus { status: driver::process::Status }, } } /// #[allow(clippy::empty_docs)] pub mod fetch { use crate::driver; /// The error returned by [State::fetch_delayed()][super::State::fetch_delayed()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Could not get process named '{}' which should be running and tracked", wanted.0)] ProcessMissing { wanted: driver::Key }, #[error("Failed to run '{command}' command")] ProcessInvoke { command: String, source: driver::process::client::invoke::Error, }, #[error("The invoked command '{command}' in process indicated an error: {status:?}")] ProcessStatus { status: driver::process::Status, command: String, }, } } /// Operations related to delayed filtering. impl State { /// Return a list of delayed paths for `process` that can then be obtained with [`fetch_delayed()`][Self::fetch_delayed()]. /// /// A process abiding the protocol will eventually list all previously delayed paths for any invoked command, or /// signals that it is done with all delayed paths by returning an empty list. /// It's up to the caller to validate these assumptions. /// /// ### Error Handling /// /// Usually if the process sends the "abort" status, we will not use a certain capability again. Here it's unclear what capability /// that is and what to do, so we leave the process running and do nothing else (just like `git`). pub fn list_delayed_paths(&mut self, process: &driver::Key) -> Result, list::Error> { let client = self .running .get_mut(&process.0) .ok_or_else(|| list::Error::ProcessMissing { wanted: process.clone(), })?; let mut out = Vec::new(); let result = client.invoke_without_content("list_available_blobs", &mut None.into_iter(), &mut |line| { if let Some(path) = line.strip_prefix(b"pathname=") { out.push(path.into()) } }); let status = match result { Ok(res) => res, Err(err) => { if let driver::process::client::invoke::without_content::Error::Io(err) = &err { handle_io_err(err, &mut self.running, process.0.as_ref()); } return Err(err.into()); } }; if status.is_success() { Ok(out) } else { let message = status.message().unwrap_or_default(); match message { "error" | "abort" => {} _strange => { let client = self.running.remove(&process.0).expect("we definitely have it"); client.into_child().kill().ok(); } } Err(list::Error::ProcessStatus { status }) } } /// Given a `process` and a `path` (as previously returned by [list_delayed_paths()][Self::list_delayed_paths()]), return /// a reader to stream the filtered result. Note that `operation` must match the original operation that produced the delayed result /// or the long-running process might not know the path, depending on its implementation. pub fn fetch_delayed( &mut self, process: &driver::Key, path: &BStr, operation: Operation, ) -> Result { let client = self .running .get_mut(&process.0) .ok_or_else(|| fetch::Error::ProcessMissing { wanted: process.clone(), })?; let result = client.invoke( operation.as_str(), &mut [("pathname", path.to_owned())].into_iter(), &mut &b""[..], ); let status = match result { Ok(status) => status, Err(err) => { let driver::process::client::invoke::Error::Io(io_err) = &err; handle_io_err(io_err, &mut self.running, process.0.as_ref()); return Err(fetch::Error::ProcessInvoke { command: operation.as_str().into(), source: err, }); } }; if status.is_success() { // TODO: find a way to not have to do the 'borrow-dance'. let client = self.running.remove(&process.0).expect("present for borrowcheck dance"); self.running.insert(process.0.clone(), client); let client = self.running.get_mut(&process.0).expect("just inserted"); Ok(client.as_read()) } else { let message = status.message().unwrap_or_default(); match message { "abort" => { client.capabilities_mut().remove(operation.as_str()); } "error" => {} _strange => { let client = self.running.remove(&process.0).expect("we definitely have it"); client.into_child().kill().ok(); } } Err(fetch::Error::ProcessStatus { command: operation.as_str().into(), status, }) } } } gix-filter-0.11.1/src/driver/init.rs000064400000000000000000000076461046102023000153660ustar 00000000000000use std::process::Stdio; use bstr::{BStr, BString}; use crate::{ driver, driver::{process, substitute_f_parameter, Operation, Process, State}, Driver, }; /// The error returned by [State::maybe_launch_process()][super::State::maybe_launch_process()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Failed to spawn driver: {command:?}")] SpawnCommand { source: std::io::Error, command: std::process::Command, }, #[error("Process handshake with command {command:?} failed")] ProcessHandshake { source: process::client::handshake::Error, command: std::process::Command, }, } /// Lifecycle impl State { /// Obtain a process as defined in `driver` suitable for a given `operation. `rela_path` may be used to substitute the current /// file for use in the invoked `SingleFile` process. /// /// Note that if a long-running process is defined, the `operation` isn't relevant and capabilities are to be checked by the caller. pub fn maybe_launch_process( &mut self, driver: &Driver, operation: Operation, rela_path: &BStr, ) -> Result>, Error> { match driver.process.as_ref() { Some(process) => { let client = match self.running.remove(process) { Some(c) => c, None => { let (child, cmd) = spawn_driver(process.clone(), &self.context)?; process::Client::handshake(child, "git-filter", &[2], &["clean", "smudge", "delay"]).map_err( |err| Error::ProcessHandshake { source: err, command: cmd, }, )? } }; // TODO: find a way to not have to do this 'borrow-dance'. // this strangeness is to workaround the borrowchecker, who otherwise won't let us return a reader. Quite sad :/. // One would want to `get_mut()` or insert essentially, but it won't work. self.running.insert(process.clone(), client); let client = self.running.get_mut(process).expect("just inserted"); Ok(Some(Process::MultiFile { client, key: driver::Key(process.to_owned()), })) } None => { let cmd = match operation { Operation::Clean => driver .clean .as_ref() .map(|cmd| substitute_f_parameter(cmd.as_ref(), rela_path)), Operation::Smudge => driver .smudge .as_ref() .map(|cmd| substitute_f_parameter(cmd.as_ref(), rela_path)), }; let cmd = match cmd { Some(cmd) => cmd, None => return Ok(None), }; let (child, command) = spawn_driver(cmd, &self.context)?; Ok(Some(Process::SingleFile { child, command })) } } } } fn spawn_driver( cmd: BString, context: &gix_command::Context, ) -> Result<(std::process::Child, std::process::Command), Error> { let mut cmd: std::process::Command = gix_command::prepare(gix_path::from_bstr(cmd).into_owned()) .with_shell() .with_context(context.clone()) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .stderr(Stdio::inherit()) .into(); gix_trace::debug!(cmd = ?cmd, "launching filter driver"); let child = match cmd.spawn() { Ok(child) => child, Err(err) => { return Err(Error::SpawnCommand { source: err, command: cmd, }) } }; Ok((child, cmd)) } gix-filter-0.11.1/src/driver/mod.rs000064400000000000000000000071521046102023000151720ustar 00000000000000use std::collections::HashMap; use bstr::{BStr, BString, ByteSlice, ByteVec}; /// #[allow(clippy::empty_docs)] pub mod init; /// #[allow(clippy::empty_docs)] pub mod apply; /// #[allow(clippy::empty_docs)] pub mod shutdown; /// #[allow(clippy::empty_docs)] pub mod delayed; /// #[allow(clippy::empty_docs)] pub mod process; /// A literal driver process. pub enum Process<'a> { /// A spawned processes to handle a single file SingleFile { /// The child to use as handle for sending and receiving data. child: std::process::Child, /// The launched command that produced the `child` in the first place command: std::process::Command, }, /// A multi-file process which is launched once to handle one or more files by using a custom IO protocol. MultiFile { /// A handle to interact with the long-running process. client: &'a mut process::Client, /// A way to refer to the `client` later if needed. key: Key, }, } /// The kind of operation to apply using a driver #[derive(Debug, Copy, Clone)] pub enum Operation { /// Turn worktree content into content suitable for storage in `git`. Clean, /// Turn content stored in `git` to content suitable for the working tree. Smudge, } impl Operation { /// Return a string that identifies the operation. This happens to be the command-names used in long-running processes as well. pub fn as_str(&self) -> &'static str { match self { Operation::Clean => "clean", Operation::Smudge => "smudge", } } } /// State required to handle `process` filters, which are running until all their work is done. /// /// These can be significantly faster on some platforms as they are launched only once, while supporting asynchronous processing. /// /// ### Lifecycle /// /// Note that [`shutdown()`][State::shutdown()] must be called to finalize long-running processes. /// Failing to do so will naturally shut them down by terminating their pipes, but finishing explicitly /// allows to wait for processes as well. #[derive(Default)] pub struct State { /// The list of currently running processes. These are preferred over simple clean-and-smudge programs. /// /// Note that these processes are expected to shut-down once their stdin/stdout are dropped, so nothing else /// needs to be done to clean them up after drop. running: HashMap, /// The context to pass to spawned filter programs. pub context: gix_command::Context, } /// Initialization impl State { /// Create a new instance using `context` to inform launched processes about their environment. pub fn new(context: gix_command::Context) -> Self { Self { running: Default::default(), context, } } } impl Clone for State { fn clone(&self) -> Self { State { running: Default::default(), context: self.context.clone(), } } } /// A way to reference a running multi-file filter process for later acquisition of delayed output. #[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] pub struct Key(BString); /// Substitute `path` as shell-save version into `cmd` which could be something like `cmd something %f`. fn substitute_f_parameter(cmd: &BStr, path: &BStr) -> BString { let mut buf: BString = Vec::with_capacity(cmd.len()).into(); let mut ofs = 0; while let Some(pos) = cmd[ofs..].find(b"%f") { buf.push_str(&cmd[..ofs + pos]); buf.extend_from_slice(&gix_quote::single(path)); ofs += pos + 2; } buf.push_str(&cmd[ofs..]); buf } gix-filter-0.11.1/src/driver/process/client.rs000064400000000000000000000252151046102023000173470ustar 00000000000000use std::{collections::HashSet, io::Write, str::FromStr}; use bstr::{BStr, BString, ByteVec}; use crate::driver::{ process, process::{Capabilities, Client, PacketlineReader}, }; /// #[allow(clippy::empty_docs)] pub mod handshake { /// The error returned by [Client::handshake()][super::Client::handshake()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Failed to read or write to the process")] Io(#[from] std::io::Error), #[error("{msg} '{actual}'")] Protocol { msg: String, actual: String }, #[error("The server sent the '{name}' capability which isn't among the ones we desire can support")] UnsupportedCapability { name: String }, } } /// #[allow(clippy::empty_docs)] pub mod invoke { /// The error returned by [Client::invoke()][super::Client::invoke()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Failed to read or write to the process")] Io(#[from] std::io::Error), } /// #[allow(clippy::empty_docs)] pub mod without_content { /// The error returned by [Client::invoke_without_content()][super::super::Client::invoke_without_content()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Failed to read or write to the process")] Io(#[from] std::io::Error), #[error(transparent)] PacketlineDecode(#[from] gix_packetline::decode::Error), } impl From for Error { fn from(value: super::Error) -> Self { match value { super::Error::Io(err) => Error::Io(err), } } } } } /// Protocol implementation impl Client { /// Given a spawned `process` as created from `cmd`, use the 'long-running-process' protocol to send `welcome-prefix` and supported /// `versions`, along with the `desired_capabilities`, and perform the handshake to negotiate a version to use along with /// obtaining supported capabilities, which may be a sub-set of the desired capabilities. pub fn handshake( mut process: std::process::Child, welcome_prefix: &str, versions: &[usize], desired_capabilities: &[&str], ) -> Result { let mut out = gix_packetline::Writer::new(process.stdin.take().expect("configured stdin when spawning")); out.write_all(format!("{welcome_prefix}-client").as_bytes())?; for version in versions { out.write_all(format!("version={version}").as_bytes())?; } gix_packetline::encode::flush_to_write(out.inner_mut())?; out.flush()?; let mut input = gix_packetline::StreamingPeekableIter::new( process.stdout.take().expect("configured stdout when spawning"), &[gix_packetline::PacketLineRef::Flush], false, /* packet tracing */ ); let mut read = input.as_read(); let mut buf = String::new(); read.read_line_to_string(&mut buf)?; if buf .strip_prefix(welcome_prefix) .map_or(true, |rest| rest.trim_end() != "-server") { return Err(handshake::Error::Protocol { msg: format!("Wanted '{welcome_prefix}-server, got "), actual: buf, }); } let chosen_version; buf.clear(); read.read_line_to_string(&mut buf)?; match buf .strip_prefix("version=") .and_then(|version| usize::from_str(version.trim_end()).ok()) { Some(version) => { chosen_version = version; } None => { return Err(handshake::Error::Protocol { msg: "Needed 'version=', got ".into(), actual: buf, }) } } if !versions.contains(&chosen_version) { return Err(handshake::Error::Protocol { msg: format!("Server offered {chosen_version}, we only support "), actual: versions.iter().map(ToString::to_string).collect::>().join(", "), }); } if read.read_line_to_string(&mut buf)? != 0 { return Err(handshake::Error::Protocol { msg: "expected flush packet, got".into(), actual: buf, }); } for capability in desired_capabilities { out.write_all(format!("capability={capability}").as_bytes())?; } gix_packetline::encode::flush_to_write(out.inner_mut())?; out.flush()?; read.reset_with(&[gix_packetline::PacketLineRef::Flush]); let mut capabilities = HashSet::new(); loop { buf.clear(); let num_read = read.read_line_to_string(&mut buf)?; if num_read == 0 { break; } match buf.strip_prefix("capability=") { Some(cap) => { let cap = cap.trim_end(); if !desired_capabilities.contains(&cap) { return Err(handshake::Error::UnsupportedCapability { name: cap.into() }); } capabilities.insert(cap.to_owned()); } None => continue, } } drop(read); Ok(Client { child: process, out: input, input: out, capabilities, version: chosen_version, }) } /// Invoke `command` and send all `meta` data before sending all `content` in full. pub fn invoke( &mut self, command: &str, meta: &mut dyn Iterator, content: &mut dyn std::io::Read, ) -> Result { self.send_command_and_meta(command, meta)?; std::io::copy(content, &mut self.input)?; gix_packetline::encode::flush_to_write(self.input.inner_mut())?; self.input.flush()?; Ok(self.read_status()?) } /// Invoke `command` while passing `meta` data, but don't send any content, and return their status. /// Call `inspect_line` for each line that we see as command response. /// /// This is for commands that don't expect a content stream. pub fn invoke_without_content<'a>( &mut self, command: &str, meta: &mut dyn Iterator, inspect_line: &mut dyn FnMut(&BStr), ) -> Result { self.send_command_and_meta(command, meta)?; while let Some(data) = self.out.read_line() { let line = data??; if let Some(line) = line.as_text() { inspect_line(line.as_bstr()); } } self.out.reset_with(&[gix_packetline::PacketLineRef::Flush]); let status = self.read_status()?; Ok(status) } /// Return a `Read` implementation that reads the server process output until the next flush package, and validates /// the status. If the status indicates failure, the last read will also fail. pub fn as_read(&mut self) -> impl std::io::Read + '_ { self.out.reset_with(&[gix_packetline::PacketLineRef::Flush]); ReadProcessOutputAndStatus { inner: self.out.as_read(), } } /// Read a `status=` line from the process output until it is exhausted. /// Note that the last sent status line wins and no status line means that the `Previous` still counts. pub fn read_status(&mut self) -> std::io::Result { read_status(&mut self.out.as_read()) } } impl Client { fn send_command_and_meta( &mut self, command: &str, meta: &mut dyn Iterator, ) -> Result<(), invoke::Error> { self.input.write_all(format!("command={command}").as_bytes())?; let mut buf = BString::default(); for (key, value) in meta { buf.clear(); buf.push_str(key); buf.push(b'='); buf.push_str(&value); self.input.write_all(&buf)?; } gix_packetline::encode::flush_to_write(self.input.inner_mut())?; Ok(()) } } fn read_status(read: &mut PacketlineReader<'_>) -> std::io::Result { let mut status = process::Status::Previous; let mut buf = String::new(); let mut count = 0; loop { buf.clear(); let num_read = read.read_line_to_string(&mut buf)?; if num_read == 0 { break; } if let Some(name) = buf.strip_prefix("status=") { status = process::Status::Named(name.trim_end().into()); } count += 1; } if count > 0 && matches!(status, process::Status::Previous) { status = process::Status::Unset; } read.reset_with(&[gix_packetline::PacketLineRef::Flush]); Ok(status) } struct ReadProcessOutputAndStatus<'a> { inner: PacketlineReader<'a>, } impl<'a> std::io::Read for ReadProcessOutputAndStatus<'a> { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { let num_read = self.inner.read(buf)?; if num_read == 0 { self.inner.reset_with(&[gix_packetline::PacketLineRef::Flush]); let status = read_status(&mut self.inner)?; if status.is_success() { Ok(0) } else { Err(std::io::Error::new( std::io::ErrorKind::Other, format!( "Process indicated error after reading: {}", status.message().unwrap_or_default() ), )) } } else { Ok(num_read) } } } /// Access impl Client { /// Return the list of capabilities reported by the serving process. pub fn capabilities(&self) -> &Capabilities { &self.capabilities } /// Return the mutable list of capabilities reported by the serving process. pub fn capabilities_mut(&mut self) -> &mut Capabilities { &mut self.capabilities } /// Return the negotiated version of the protocol. /// /// Note that it is the highest one that both the client and the server support. pub fn version(&self) -> usize { self.version } } /// Lifecycle impl Client { /// Return the child handle of the running process. /// /// Note that this will naturally close input and output handles, which is a signal for the child process to shutdown. pub fn into_child(self) -> std::process::Child { self.child } } gix-filter-0.11.1/src/driver/process/mod.rs000064400000000000000000000076131046102023000166520ustar 00000000000000use std::collections::HashSet; /// A set of capabilities that have been negotiated between client and server. pub type Capabilities = HashSet; /// A handle to a client that allows communicating to a long-running process. pub struct Client { /// The child process we are communicating with. child: std::process::Child, /// The names of the obtained capabilities after the handshake. capabilities: Capabilities, /// The negotiated version of the protocol. version: usize, /// A way to send packet-line encoded information to the process. input: gix_packetline::Writer, /// A way to read information sent to us by the process. out: gix_packetline::StreamingPeekableIter, } /// A handle to facilitate typical server interactions that include the handshake and command-invocations. pub struct Server { /// The names of the capabilities we can expect the client to use. capabilities: Capabilities, /// The negotiated version of the protocol, it's the highest supported one. version: usize, /// A way to receive information from the client. input: gix_packetline::StreamingPeekableIter>, /// A way to send information to the client. out: gix_packetline::Writer>, } /// The return status of an [invoked command][Client::invoke()]. #[derive(Debug, Clone)] pub enum Status { /// No new status was set, and nothing was sent, so instead we are to assume the previous status is still in effect. Previous, /// Something was sent, but we couldn't identify it as status. Unset, /// Assume the given named status. Named(String), } /// Initialization impl Status { /// Create a new instance that represents a successful operation. pub fn success() -> Self { Status::Named("success".into()) } /// Create a new instance that represents a delayed operation. pub fn delayed() -> Self { Status::Named("delayed".into()) } /// Create a status that indicates to the client that the command that caused it will not be run anymore throughout the lifetime /// of the process. However, other commands may still run. pub fn abort() -> Self { Status::Named("abort".into()) } /// Create a status that makes the client send a kill signal. pub fn exit() -> Self { Status::Named("send-term-signal".into()) } /// Create a new instance that represents an error with the given `message`. pub fn error(message: impl Into) -> Self { Status::Named(message.into()) } } /// Access impl Status { /// Note that this is assumed true even if no new status is set, hence we assume that upon error, the caller will not continue /// interacting with the process. pub fn is_success(&self) -> bool { match self { Status::Previous => true, Status::Unset => false, Status::Named(n) => n == "success", } } /// Returns true if this is an `abort` status. pub fn is_abort(&self) -> bool { self.message().map_or(false, |m| m == "abort") } /// Return true if the status is explicitly set to indicated delayed output processing pub fn is_delayed(&self) -> bool { match self { Status::Previous | Status::Unset => false, Status::Named(n) => n == "delayed", } } /// Return the status message if present. pub fn message(&self) -> Option<&str> { match self { Status::Previous | Status::Unset => None, Status::Named(msg) => msg.as_str().into(), } } } /// #[allow(clippy::empty_docs)] pub mod client; /// #[allow(clippy::empty_docs)] pub mod server; type PacketlineReader<'a, T = std::process::ChildStdout> = gix_packetline::read::WithSidebands<'a, T, fn(bool, &[u8]) -> gix_packetline::read::ProgressAction>; gix-filter-0.11.1/src/driver/process/server.rs000064400000000000000000000234631046102023000174020ustar 00000000000000use std::{collections::HashSet, io::Write, str::FromStr}; use bstr::{BString, ByteSlice}; use crate::driver::process::Server; /// A request to be handled by the server, typically done in a loop. pub struct Request<'a> { parent: &'a mut Server, /// The command to execute with this request. pub command: String, /// A list of key-value pairs of meta-data related to `command`. pub meta: Vec<(String, BString)>, } /// #[allow(clippy::empty_docs)] pub mod next_request { use bstr::BString; /// The error returned by [Server::next_request()][super::Server::next_request()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Failed to read from the client")] Io(#[from] std::io::Error), #[error("{msg} '{actual}'")] Protocol { msg: String, actual: BString }, #[error(transparent)] PacketlineDecode(#[from] gix_packetline::decode::Error), } } /// #[allow(clippy::empty_docs)] pub mod handshake { /// The error returned by [Server::handshake()][super::Server::handshake()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Failed to read or write to the client")] Io(#[from] std::io::Error), #[error("{msg} '{actual}'")] Protocol { msg: String, actual: String }, #[error("Could not select supported version from the one sent by the client: {}", actual.iter().map(ToString::to_string).collect::>().join(", "))] VersionMismatch { actual: Vec }, } } impl Server { /// Perform a handshake with the client sending information to our `stdin` and receiving information through our `stdout` /// in packetline format. /// `pick_version` is called with all versions supported by the client to pick one from, or `None` to indicate the handshake /// should stop. /// Use `available_capabilities` to match our capabilities with the ones from the client, so we communicate at most a subset of these. /// /// ### Note /// /// The server claims exclusive access over stdout and stdin, so all kinds of other output has to be steered towards stderr or there /// will be a deadlock. pub fn handshake( stdin: std::io::Stdin, stdout: std::io::Stdout, welcome_prefix: &str, pick_version: &mut dyn FnMut(&[usize]) -> Option, available_capabilities: &[&str], ) -> Result { let mut input = gix_packetline::StreamingPeekableIter::new( stdin.lock(), &[gix_packetline::PacketLineRef::Flush], false, /* packet tracing */ ); let mut read = input.as_read(); let mut buf = String::new(); read.read_line_to_string(&mut buf)?; if buf .strip_prefix(welcome_prefix) .map_or(true, |rest| rest.trim_end() != "-client") { return Err(handshake::Error::Protocol { msg: format!("Expected '{welcome_prefix}-client, got"), actual: buf, }); } let mut versions = Vec::new(); loop { buf.clear(); let num_read = read.read_line_to_string(&mut buf)?; if num_read == 0 { break; } versions.push( match buf .strip_prefix("version=") .and_then(|version| usize::from_str(version.trim_end()).ok()) { Some(version) => version, None => { return Err(handshake::Error::Protocol { msg: "Expected 'version=', got".into(), actual: buf, }) } }, ); } let version = pick_version(&versions).ok_or(handshake::Error::VersionMismatch { actual: versions })?; read.reset_with(&[gix_packetline::PacketLineRef::Flush]); let mut out = gix_packetline::Writer::new(stdout.lock()); out.write_all(format!("{welcome_prefix}-server").as_bytes())?; out.write_all(format!("version={version}").as_bytes())?; gix_packetline::encode::flush_to_write(out.inner_mut())?; out.flush()?; let mut capabilities = HashSet::new(); loop { buf.clear(); let num_read = read.read_line_to_string(&mut buf)?; if num_read == 0 { break; } match buf.strip_prefix("capability=") { Some(cap) => { let cap = cap.trim_end(); if available_capabilities.contains(&cap) { capabilities.insert(cap.to_owned()); } } None => continue, }; } for cap in &capabilities { out.write_all(format!("capability={cap}").as_bytes())?; } gix_packetline::encode::flush_to_write(out.inner_mut())?; out.flush()?; drop(read); Ok(Server { capabilities, version, out, input, }) } /// Read the next request and return it, even if [`command`][Request::command] is *not* supported by us. /// If `Ok(None)` is reported, the request loop should end and the process should be shutdown gracefully. /// /// The reason for allowing any command is that the caller would have to match on the command anyway, and would /// have to handle invalid commands that way. /// /// ### Lifecycle /// /// Note that the process is supposed to shut-down once there are no more requests, and `git` will wait /// until it has finished. pub fn next_request(&mut self) -> Result>, next_request::Error> { let mut buf = String::new(); let mut read = self.input.as_read(); match read.read_line_to_string(&mut buf) { Ok(_) => {} Err(err) if err.kind() == std::io::ErrorKind::UnexpectedEof => return Ok(None), Err(err) => return Err(err.into()), } let command = match buf.strip_prefix("command=").map(str::trim_end).map(ToOwned::to_owned) { Some(cmd) => cmd, None => { return Err(next_request::Error::Protocol { msg: "Wanted 'command=', got ".into(), actual: buf.into(), }) } }; let mut meta = Vec::with_capacity(1); while let Some(res) = read.read_data_line() { let line = res??; let line = line .as_bstr() .ok_or_else(|| next_request::Error::Protocol { msg: "expected data line, got ".into(), actual: format!("{line:?}").into(), })? .trim(); let mut tokens = line.splitn(2, |b| *b == b'='); let (key, value) = tokens .next() .zip(tokens.next()) .ok_or_else(|| next_request::Error::Protocol { msg: "Expected 'key=value' metadata, got".into(), actual: line.into(), })?; assert!(tokens.next().is_none(), "configured to yield at most two tokens"); meta.push((key.as_bstr().to_string(), value.into())) } drop(read); self.input.reset_with(&[gix_packetline::PacketLineRef::Flush]); Ok(Some(Request { parent: self, command, meta, })) } } mod request { use std::io::Write; use crate::driver::{ process, process::{server::Request, PacketlineReader}, }; impl Request<'_> { /// Turn ourselves into a reader that can read until the next flush packet. pub fn as_read(&mut self) -> PacketlineReader<'_, std::io::StdinLock<'static>> { self.parent.input.as_read() } /// Provide the write-end of the underlying process. pub fn as_write(&mut self) -> impl std::io::Write + '_ { WriteAndFlushOnDrop { inner: &mut self.parent.out, } } /// Write the `status` message followed by a flush packet. pub fn write_status(&mut self, status: process::Status) -> std::io::Result<()> { let out = &mut self.parent.out; if let Some(message) = status.message() { out.write_all(format!("status={message}").as_bytes())?; } gix_packetline::encode::flush_to_write(out.inner_mut())?; out.flush() } } impl std::fmt::Debug for Request<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Request") .field("command", &self.command) .field("meta", &self.meta) .finish() } } struct WriteAndFlushOnDrop<'a> { inner: &'a mut gix_packetline::Writer>, } impl std::io::Write for WriteAndFlushOnDrop<'_> { fn write(&mut self, buf: &[u8]) -> std::io::Result { self.inner.write(buf) } fn flush(&mut self) -> std::io::Result<()> { self.inner.flush() } } impl Drop for WriteAndFlushOnDrop<'_> { fn drop(&mut self) { gix_packetline::encode::flush_to_write(self.inner.inner_mut()).ok(); self.inner.flush().ok(); } } } /// Access impl Server { /// Return the list of capabilities we are allowed to use, as negotiated with the client. pub fn capabilities(&self) -> &HashSet { &self.capabilities } /// Return the negotiated version of the protocol. pub fn version(&self) -> usize { self.version } } gix-filter-0.11.1/src/driver/shutdown.rs000064400000000000000000000023641046102023000162660ustar 00000000000000use bstr::BString; use crate::driver::State; /// #[derive(Debug, Copy, Clone)] pub enum Mode { /// Wait for long-running processes after signaling them to shut down by closing their input and output. WaitForProcesses, /// Do not do anything with long-running processes, which typically allows them to keep running or shut down on their own time. /// This is the fastest mode as no synchronization happens at all. Ignore, } /// Lifecycle impl State { /// Handle long-running processes according to `mode`. If an error occurs, all remaining processes will be ignored automatically. /// Return a list of `(process, Option)` pub fn shutdown(self, mode: Mode) -> Result)>, std::io::Error> { let mut out = Vec::with_capacity(self.running.len()); for (cmd, client) in self.running { match mode { Mode::WaitForProcesses => { let mut child = client.into_child(); let status = child.wait()?; out.push((cmd, Some(status))); } Mode::Ignore => { out.push((cmd, None)); } } } Ok(out) } } gix-filter-0.11.1/src/eol/convert_to_git.rs000064400000000000000000000146011046102023000167210ustar 00000000000000use std::path::{Path, PathBuf}; /// Additional context for use with [`convert_to_git`][super::convert_to_git()]. #[derive(Default, Copy, Clone)] pub struct Options<'a> { /// How to perform round-trip checks. pub round_trip_check: Option>, /// Configuration related to EOL. pub config: crate::eol::Configuration, } /// The kind of round-trip check to perform when converting line endings to `git`, i.e. `CRLF` to `LF`. #[derive(Debug, Copy, Clone)] pub enum RoundTripCheck<'a> { /// Fail with an error if conversion isn't round-trip safe. Fail { /// The repository-relative path of the file to check. Used in case of error. rela_path: &'a Path, }, /// Emit a warning using `gix_trace::warn!`, but don't fail. /// /// Note that the parent application has to setup tracing to make these events visible, along with a parent `span!`. Warn { /// The repository-relative path of the file to check. Used in case of error. rela_path: &'a Path, }, } /// The error returned by [convert_to_git()][super::convert_to_git()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("{msg} in '{}'", path.display())] RoundTrip { msg: &'static str, path: PathBuf }, #[error("Could not obtain index object to check line endings for")] FetchObjectFromIndex(#[source] Box), #[error("Could not allocate buffer")] OutOfMemory(#[from] std::collections::TryReserveError), } /// A function that writes a buffer like `fn(&mut buf)` with by tes of an object in the index that is the one that should be converted. pub type IndexObjectFn<'a> = dyn FnMut(&mut Vec) -> Result, Box> + 'a; pub(crate) mod function { use bstr::ByteSlice; use crate::{ clear_and_set_capacity, eol::{ convert_to_git::{Error, IndexObjectFn, Options, RoundTripCheck}, AttributesDigest, Stats, }, }; /// Given a `src` buffer, change it `git` (`\n`) line endings and store the result in `buf`. /// Return `true` if `buf` was written or `false` if nothing had to be done. /// Depending on the state in `buf`, `index_object` is called to write the version of `src` as stored in the index /// into the buffer and if it is a blob, or return `Ok(None)` if no such object exists. /// If renormalization is desired, let it return `Ok(None)` at all times to not let it have any influence over the /// outcome of this function. /// If `round_trip_check` is not `None`, round-tripping will be validated and handled accordingly. pub fn convert_to_git( src: &[u8], digest: AttributesDigest, buf: &mut Vec, index_object: &mut IndexObjectFn<'_>, Options { round_trip_check, config, }: Options<'_>, ) -> Result { if digest == AttributesDigest::Binary || src.is_empty() { return Ok(false); } let stats = Stats::from_bytes(src); let mut convert_crlf_to_lf = stats.crlf > 0; if digest.is_auto_text() { // In this mode, we are supposed to figure out ourselves if we should convert or not. if stats.is_binary() { return Ok(false); } if let Some(()) = index_object(buf).map_err(Error::FetchObjectFromIndex)? { let has_crlf_in_index = buf .find_byte(b'\r') .map(|_| Stats::from_bytes(buf)) .filter(|s| !s.is_binary() && s.crlf > 0) .is_some(); if has_crlf_in_index { convert_crlf_to_lf = false; } } } if let Some(round_trip_check) = round_trip_check { let mut new_stats = stats; // simulate to-git conversion/git-add if convert_crlf_to_lf { new_stats.lone_lf += new_stats.crlf; new_stats.crlf = 0; } // simulate worktree checkout if new_stats.will_convert_lf_to_crlf(digest, config) { new_stats.crlf += new_stats.lone_lf; new_stats.lone_lf = 0; } if stats.crlf > 0 && new_stats.crlf == 0 { // CRLF would not be restored by checkout match round_trip_check { RoundTripCheck::Fail { rela_path } => { return Err(Error::RoundTrip { msg: "CRLF would be replaced by LF", path: rela_path.to_owned(), }) } #[allow(unused_variables)] RoundTripCheck::Warn { rela_path } => { gix_trace::warn!( "in the working copy of '{}', CRLF will be replaced by LF next time git touches it", rela_path.display() ) } } } else if stats.lone_lf > 0 && new_stats.lone_lf == 0 { // CRLF would be added by checkout match round_trip_check { RoundTripCheck::Fail { rela_path } => { return Err(Error::RoundTrip { msg: "LF would be replaced by CRLF", path: rela_path.to_owned(), }) } #[allow(unused_variables)] RoundTripCheck::Warn { rela_path } => { gix_trace::warn!( "in the working copy of '{}', LF will be replaced by CRLF next time git touches it", rela_path.display() ) } } } } if !convert_crlf_to_lf { return Ok(false); } clear_and_set_capacity(buf, src.len() - stats.crlf)?; if stats.lone_cr == 0 { buf.extend(src.iter().filter(|b| **b != b'\r')); } else { let mut bytes = src.iter().peekable(); while let Some(b) = bytes.next() { if !(*b == b'\r' && bytes.peek() == Some(&&b'\n')) { buf.push(*b); } } } Ok(true) } } gix-filter-0.11.1/src/eol/convert_to_worktree.rs000064400000000000000000000027521046102023000200040ustar 00000000000000use bstr::{ByteSlice, ByteVec}; use crate::{ clear_and_set_capacity, eol::{AttributesDigest, Configuration, Mode, Stats}, }; /// Convert all `\n` in `src` to `crlf` if `digest` and `config` indicate it, returning `true` if `buf` holds the result, or `false` /// if no change was made after all. pub fn convert_to_worktree( src: &[u8], digest: AttributesDigest, buf: &mut Vec, config: Configuration, ) -> Result { if src.is_empty() || digest.to_eol(config) != Some(Mode::CrLf) { return Ok(false); } let stats = Stats::from_bytes(src); if !stats.will_convert_lf_to_crlf(digest, config) { return Ok(false); } clear_and_set_capacity(buf, src.len() + stats.lone_lf)?; let mut ofs = 0; while let Some(pos) = src[ofs..].find_byteset(b"\r\n") { match src[ofs + pos] { b'\r' => { if src.get(ofs + pos + 1) == Some(&b'\n') { buf.push_str(&src[ofs..][..pos + 2]); ofs += pos + 2; } else { buf.push_str(&src[ofs..][..pos + 1]); ofs += pos + 1; } } b'\n' => { buf.push_str(&src[ofs..][..pos]); buf.push_str(b"\r\n"); ofs += pos + 1; } _ => unreachable!("would only find one of two possible values"), } } buf.push_str(&src[ofs..]); Ok(true) } gix-filter-0.11.1/src/eol/mod.rs000064400000000000000000000056111046102023000144540ustar 00000000000000/// #[allow(clippy::empty_docs)] pub mod convert_to_git; pub use convert_to_git::function::convert_to_git; mod convert_to_worktree; pub use convert_to_worktree::convert_to_worktree; mod utils; /// The kind of end of lines to set. /// /// The default is implemented to be the native line ending for the current platform. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Mode { /// Equivalent to `git` (`\n`) line-endings. Lf, /// Equivalent to `windows` (`\r\n`) line-endings. CrLf, } /// Possible states for the `core.autocrlf`. #[derive(Default, Debug, Copy, Clone, Eq, PartialEq)] pub enum AutoCrlf { /// The same as if the `text eol=lf` attribute is set. Input, /// The same as if the `text eol=crlf` attribute is set. Enabled, /// No conversion is performed. #[default] Disabled, } /// The combination of `crlf`, `text` and `eol` attributes into one neat package. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum AttributesDigest { /// Equivalent to the `-text` attribute. Binary, /// Equivalent to the `text` attribute. Text, /// Equivalent to the `text eol=lf` attributes. TextInput, /// Equivalent to the `text eol=crlf` attributes. TextCrlf, /// Equivalent to the `text=auto` attributes. TextAuto, /// Equivalent to the `text=auto eol=crlf` attributes. TextAutoCrlf, /// Equivalent to the `text=auto eol=lf` attributes. TextAutoInput, } impl From for AttributesDigest { fn from(value: Mode) -> Self { match value { Mode::Lf => AttributesDigest::TextInput, Mode::CrLf => AttributesDigest::TextCrlf, } } } impl From for AttributesDigest { fn from(value: AutoCrlf) -> Self { match value { AutoCrlf::Input => AttributesDigest::TextAutoInput, AutoCrlf::Enabled => AttributesDigest::TextAutoCrlf, AutoCrlf::Disabled => AttributesDigest::Binary, } } } /// Git Configuration that affects how CRLF conversions are applied. #[derive(Default, Debug, Copy, Clone)] pub struct Configuration { /// Corresponds to `core.autocrlf`. pub auto_crlf: AutoCrlf, /// Corresponds to `core.eol`, and is `None` if unset or set to `native`, or `Some()` respectively. pub eol: Option, } /// Statistics about a buffer that helps to safely perform EOL conversions #[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] pub struct Stats { /// The amount of null bytes. pub null: usize, /// The amount of lone carriage returns (`\r`). pub lone_cr: usize, /// The amount of lone line feeds (`\n`). pub lone_lf: usize, /// The amount carriage returns followed by line feeds pub crlf: usize, /// The estimate of printable characters. pub printable: usize, /// The estimate of characters that can't be printed. pub non_printable: usize, } gix-filter-0.11.1/src/eol/utils.rs000064400000000000000000000074051046102023000150400ustar 00000000000000use crate::eol::{AttributesDigest, AutoCrlf, Configuration, Mode, Stats}; impl Default for Mode { fn default() -> Self { if cfg!(windows) { Mode::CrLf } else { Mode::Lf } } } impl AttributesDigest { /// Return the end-of-line mode this digest would require, or `None` if no conversion would be performed. pub fn to_eol(&self, config: Configuration) -> Option { Some(match self { AttributesDigest::Binary => return None, AttributesDigest::TextInput | AttributesDigest::TextAutoInput => Mode::Lf, AttributesDigest::TextCrlf | AttributesDigest::TextAutoCrlf => Mode::CrLf, AttributesDigest::Text | AttributesDigest::TextAuto => config.to_eol(), }) } /// Return true if this digest allows for auto-determination of CRLF text conversion. pub fn is_auto_text(&self) -> bool { matches!( self, AttributesDigest::TextAuto | AttributesDigest::TextAutoCrlf | AttributesDigest::TextAutoInput ) } } impl Configuration { /// Return the line-ending mode that is configured here. pub fn to_eol(&self) -> Mode { match self.auto_crlf { AutoCrlf::Enabled => Mode::CrLf, AutoCrlf::Input => Mode::Lf, AutoCrlf::Disabled => self.eol.unwrap_or_default(), } } } impl Stats { /// Gather statistics from the given `bytes`. /// /// Note that the entire buffer will be scanned. pub fn from_bytes(bytes: &[u8]) -> Self { let mut bytes = bytes.iter().peekable(); let mut null = 0; let mut lone_cr = 0; let mut lone_lf = 0; let mut crlf = 0; let mut printable = 0; let mut non_printable = 0; while let Some(b) = bytes.next() { if *b == b'\r' { match bytes.peek() { Some(n) if **n == b'\n' => { bytes.next(); crlf += 1 } _ => lone_cr += 1, } continue; } if *b == b'\n' { lone_lf += 1; continue; } if *b == 127 { non_printable += 1; } else if *b < 32 { match *b { 8 /* \b */ | b'\t' | 27 /* \033 */ | 12 /* \014 */ => printable += 1, 0 => { non_printable += 1; null += 1; }, _ => non_printable += 1, } } else { printable += 1; } } Self { null, lone_cr, lone_lf, crlf, printable, non_printable, } } /// Returns `true` if these statistics are typical for a binary file. pub fn is_binary(&self) -> bool { self.lone_cr > 0 || self.null > 0 || (self.printable >> 7) < self.non_printable } /// Return `true` if we would convert the buffer from which these stats are derived, knowing only the digest pub fn will_convert_lf_to_crlf(&self, digest: AttributesDigest, config: Configuration) -> bool { if digest.to_eol(config) != Some(Mode::CrLf) { return false; } // nothing to do? if self.lone_lf == 0 { return false; } if digest.is_auto_text() { if self.is_binary() { return false; } // Lone `\r` or mixed LF and CRLF isn't safe as it won't round-trip, and in auto-mode we don't touch it. if self.lone_cr > 0 || self.crlf > 0 { return false; } } true } } gix-filter-0.11.1/src/ident.rs000064400000000000000000000055701046102023000142250ustar 00000000000000use std::ops::Range; use bstr::{ByteSlice, ByteVec}; use crate::clear_and_set_capacity; /// Undo identifiers like `$Id:$` to `$Id$` in `src` and write to `buf`. Newlines between dollars are ignored. /// Return `true` if `buf` was written or `false` if `src` was left unaltered (as there was nothing to do). pub fn undo(src: &[u8], buf: &mut Vec) -> Result { fn find_range(input: &[u8]) -> Option> { let mut ofs = 0; loop { let mut cursor = input.get(ofs..)?; let start = cursor.find(b"$Id:")?; cursor = cursor.get((start + 4)..)?; let maybe_end = cursor.find_byteset(b"$\n")?; if cursor[maybe_end] == b'\n' { ofs += start + 4 + maybe_end + 1; continue; } else { return Some((ofs + start)..(ofs + start + 4 + maybe_end + 1)); } } } let mut ofs = 0; let mut initialized = false; while let Some(range) = find_range(&src[ofs..]) { if !initialized { clear_and_set_capacity(buf, src.len())?; initialized = true; } buf.push_str(&src[ofs..][..range.start]); buf.push_str(b"$Id$"); ofs += range.end; } if initialized { buf.push_str(&src[ofs..]); } Ok(initialized) } /// Substitute all occurrences of `$Id$` with `$Id: $` if present in `src` and write all changes to `buf`, /// with `object_hash` being used accordingly. Return `true` if `buf` was written to or `false` if no change was made /// (as there was nothing to do). /// /// ### Deviation /// /// `Git` also tries to cleanup 'stray' substituted `$Id: $`, but we don't do that, sticking exactly to what ought to be done. /// The respective code is up to 16 years old and one might assume that `git` by now handles checking and checkout filters correctly. pub fn apply( src: &[u8], object_hash: gix_hash::Kind, buf: &mut Vec, ) -> Result { const HASH_LEN: usize = ": ".len() + gix_hash::Kind::longest().len_in_hex(); let mut id = None; let mut ofs = 0; while let Some(pos) = src[ofs..].find(b"$Id$") { let id = match id { None => { let new_id = gix_object::compute_hash(object_hash, gix_object::Kind::Blob, src); id = new_id.into(); clear_and_set_capacity(buf, src.len() + HASH_LEN)?; // pre-allocate for one ID new_id } Some(id) => id.to_owned(), }; buf.push_str(&src[ofs..][..pos + 3]); buf.push_str(b": "); id.write_hex_to(&mut *buf).expect("writes to memory always work"); buf.push(b'$'); ofs += pos + 4; } if id.is_some() { buf.push_str(&src[ofs..]); } Ok(id.is_some()) } gix-filter-0.11.1/src/lib.rs000064400000000000000000000072721046102023000136710ustar 00000000000000//! A library for implementing everything needed to deal with git filter pipelines. //! //! Generally, multiple filters are applied in a row forming a pipeline, with each filter being a stage in that pipeline. //! This pipeline is pre-determined with each stage being configurable. //! //! The transformation on an input buffer goes in two ways: either a filter is applied, or its effects are undone. Differentiating //! between these states is important to avoid comparing unfiltered buffers with filtered ones, for example. //! //! This crate implements the building blocks in terms of applying and undoing filters, along with logic to decide whether //! or not to apply such a filter. #![deny(rust_2018_idioms, missing_docs, unsafe_code)] use bstr::BString; /// A forwarding of the `encoding_rs` crate for its types and convenience. pub use encoding_rs as encoding; /// The `gix-attributes` crate whose types are mentioned in the public API of [Pipeline::convert_to_worktree()]. pub use gix_attributes as attributes; /// a filter to replace `$Id$` with a git-hash of the buffer. pub mod ident; /// convert line endings in buffers pub mod eol; /// change encodings based on the `working-tree-encoding` attribute. pub mod worktree; /// use filter programs to perform any kind of conversion. pub mod driver; /// #[allow(clippy::empty_docs)] pub mod pipeline; /// The standard git filter pipeline comprised of multiple standard filters and support for external filters. /// /// It's configuring itself for each provided path based on the path's attributes, implementing the complex logic that governs it. #[derive(Clone)] pub struct Pipeline { /// Various options that are all defaultable. options: pipeline::Options, /// Storage for the attributes of each item we should process, configured for use with all attributes that concern us. attrs: gix_attributes::search::Outcome, /// Additional context to pass to process filters. context: pipeline::Context, /// State needed to keep running filter processes. processes: driver::State, /// A utility to handle multiple buffers to keep results of various filters. bufs: gix_utils::Buffers, } /// A declaration of a driver program. /// /// It consists of up to three program declarations. #[derive(Debug, Clone)] pub struct Driver { /// The name of the driver as stored in the configuration. pub name: BString, /// The program invocation that cleans a worktree file for storage in `git`. /// /// Note that the command invocation may need its `%f` argument substituted with the name of the file to process. It will be quoted. pub clean: Option, /// The program invocation that readies a file stored in `git` for the worktree. /// /// Note that the command invocation may need its `%f` argument substituted with the name of the file to process. It will be quoted. pub smudge: Option, /// the long-running program that can typically handle both smudge and clean, and possibly delay processing as well. pub process: Option, /// If `true`, the `clean` or `smudge` programs need to succeed in order to make their content usable. Otherwise their /// exit code is ignored. /// Note that this is more of a suggestion as we will always report errors as they happen as the driver API is streaming in nature, /// which makes soft-failures impossible unless the caller takes precautions. pub required: bool, } fn clear_and_set_capacity(buf: &mut Vec, cap: usize) -> Result<(), std::collections::TryReserveError> { buf.clear(); if buf.capacity() < cap { buf.try_reserve(cap)?; debug_assert!(buf.capacity() >= cap, "{} >= {}", buf.capacity(), cap); } Ok(()) } gix-filter-0.11.1/src/pipeline/convert.rs000064400000000000000000000330671046102023000164110ustar 00000000000000use std::{io::Read, path::Path}; use bstr::BStr; use crate::{driver, eol, ident, pipeline::util::Configuration, worktree, Pipeline}; /// #[allow(clippy::empty_docs)] pub mod configuration { use bstr::BString; /// Errors related to the configuration of filter attributes. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("The encoding named '{name}' isn't available")] UnknownEncoding { name: BString }, #[error("Encodings must be names, like UTF-16, and cannot be booleans.")] InvalidEncoding, } } /// #[allow(clippy::empty_docs)] pub mod to_git { /// A function that fills `buf` `fn(&mut buf)` with the data stored in the index of the file that should be converted. pub type IndexObjectFn<'a> = dyn FnMut(&mut Vec) -> Result, gix_object::find::Error> + 'a; /// The error returned by [Pipeline::convert_to_git()][super::Pipeline::convert_to_git()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error(transparent)] Eol(#[from] crate::eol::convert_to_git::Error), #[error(transparent)] Worktree(#[from] crate::worktree::encode_to_git::Error), #[error(transparent)] Driver(#[from] crate::driver::apply::Error), #[error(transparent)] Configuration(#[from] super::configuration::Error), #[error("Copy of driver process output to memory failed")] ReadProcessOutputToBuffer(#[from] std::io::Error), #[error("Could not allocate buffer")] OutOfMemory(#[from] std::collections::TryReserveError), } } /// #[allow(clippy::empty_docs)] pub mod to_worktree { /// The error returned by [Pipeline::convert_to_worktree()][super::Pipeline::convert_to_worktree()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error(transparent)] Worktree(#[from] crate::worktree::encode_to_worktree::Error), #[error(transparent)] Driver(#[from] crate::driver::apply::Error), #[error(transparent)] Configuration(#[from] super::configuration::Error), #[error("Could not allocate buffer")] OutOfMemory(#[from] std::collections::TryReserveError), } } /// Access impl Pipeline { /// Convert a `src` stream (to be found at `rela_path`) to a representation suitable for storage in `git` /// based on the `attributes` at `rela_path` which is passed as first argument.. /// When converting to `crlf`, and depending on the configuration, `index_object` might be called to obtain the index /// version of `src` if available. It can return `Ok(None)` if this information isn't available. pub fn convert_to_git( &mut self, mut src: R, rela_path: &Path, attributes: &mut dyn FnMut(&BStr, &mut gix_attributes::search::Outcome), index_object: &mut to_git::IndexObjectFn<'_>, ) -> Result, to_git::Error> where R: std::io::Read, { let bstr_path = gix_path::into_bstr(rela_path); let Configuration { driver, digest, _attr_digest: _, encoding, apply_ident_filter, } = Configuration::at_path( bstr_path.as_ref(), &self.options.drivers, &mut self.attrs, attributes, self.options.eol_config, )?; let mut in_buffer = false; // this is just an approximation, but it's as good as it gets without reading the actual input. let would_convert_eol = eol::convert_to_git( b"\r\n", digest, &mut self.bufs.dest, &mut |_| Ok(None), eol::convert_to_git::Options { round_trip_check: None, config: self.options.eol_config, }, )?; if let Some(driver) = driver { if let Some(mut read) = self.processes.apply( driver, &mut src, driver::Operation::Clean, self.context.with_path(bstr_path.as_ref()), )? { if !apply_ident_filter && encoding.is_none() && !would_convert_eol { // Note that this is not typically a benefit in terms of saving memory as most filters // aren't expected to make the output file larger. It's more about who is waiting for the filter's // output to arrive, which won't be us now. For `git-lfs` it definitely won't matter though. return Ok(ToGitOutcome::Process(read)); } self.bufs.clear(); read.read_to_end(&mut self.bufs.src)?; in_buffer = true; } } if !in_buffer && (apply_ident_filter || encoding.is_some() || would_convert_eol) { self.bufs.clear(); src.read_to_end(&mut self.bufs.src)?; in_buffer = true; } if let Some(encoding) = encoding { worktree::encode_to_git( &self.bufs.src, encoding, &mut self.bufs.dest, if self.options.encodings_with_roundtrip_check.contains(&encoding) { worktree::encode_to_git::RoundTripCheck::Fail } else { worktree::encode_to_git::RoundTripCheck::Skip }, )?; self.bufs.swap(); } if eol::convert_to_git( &self.bufs.src, digest, &mut self.bufs.dest, &mut |buf| index_object(buf), eol::convert_to_git::Options { round_trip_check: self.options.crlf_roundtrip_check.to_eol_roundtrip_check(rela_path), config: self.options.eol_config, }, )? { self.bufs.swap(); } if apply_ident_filter && ident::undo(&self.bufs.src, &mut self.bufs.dest)? { self.bufs.swap(); } Ok(if in_buffer { ToGitOutcome::Buffer(&self.bufs.src) } else { ToGitOutcome::Unchanged(src) }) } /// Convert a `src` buffer located at `rela_path` (in the index) from what's in `git` to the worktree representation, /// asking for `attributes` with `rela_path` as first argument to configure the operation automatically. /// `can_delay` defines if long-running processes can delay their response, and if they *choose* to the caller has to /// specifically deal with it by interacting with the [`driver_state`][Pipeline::driver_state_mut()] directly. /// /// The reason `src` is a buffer is to indicate that `git` generally doesn't do well streaming data, so it should be small enough /// to be performant while being held in memory. This is typically the case, especially if `git-lfs` is used as intended. pub fn convert_to_worktree<'input>( &mut self, src: &'input [u8], rela_path: &BStr, attributes: &mut dyn FnMut(&BStr, &mut gix_attributes::search::Outcome), can_delay: driver::apply::Delay, ) -> Result, to_worktree::Error> { let Configuration { driver, digest, _attr_digest: _, encoding, apply_ident_filter, } = Configuration::at_path( rela_path, &self.options.drivers, &mut self.attrs, attributes, self.options.eol_config, )?; let mut bufs = self.bufs.use_foreign_src(src); let (src, dest) = bufs.src_and_dest(); if apply_ident_filter && ident::apply(src, self.options.object_hash, dest)? { bufs.swap(); } let (src, dest) = bufs.src_and_dest(); if eol::convert_to_worktree(src, digest, dest, self.options.eol_config)? { bufs.swap(); }; if let Some(encoding) = encoding { let (src, dest) = bufs.src_and_dest(); worktree::encode_to_worktree(src, encoding, dest)?; bufs.swap(); } if let Some(driver) = driver { let (mut src, _dest) = bufs.src_and_dest(); if let Some(maybe_delayed) = self.processes.apply_delayed( driver, &mut src, driver::Operation::Smudge, can_delay, self.context.with_path(rela_path), )? { return Ok(ToWorktreeOutcome::Process(maybe_delayed)); } } Ok(match bufs.ro_src { Some(src) => ToWorktreeOutcome::Unchanged(src), None => ToWorktreeOutcome::Buffer(bufs.src), }) } } /// The result of a conversion with zero or more filters to be stored in git. pub enum ToGitOutcome<'pipeline, R> { /// The original input wasn't changed and the reader is still available for consumption. Unchanged(R), /// An external filter (and only that) was applied and its results *have to be consumed*. Process(Box), /// A reference to the result of one or more filters of which one didn't support streaming. /// /// This can happen if an `eol`, `working-tree-encoding` or `ident` filter is applied, possibly on top of an external filter. Buffer(&'pipeline [u8]), } /// The result of a conversion with zero or more filters. /// /// ### Panics /// /// If `std::io::Read` is used on it and the output is delayed, a panic will occur. The caller is responsible for either disallowing delayed /// results or if allowed, handle them. Use [`is_delayed()][Self::is_delayed()]. pub enum ToWorktreeOutcome<'input, 'pipeline> { /// The original input wasn't changed and the original buffer is present Unchanged(&'input [u8]), /// A reference to the result of one or more filters of which one didn't support streaming. /// /// This can happen if an `eol`, `working-tree-encoding` or `ident` filter is applied, possibly on top of an external filter. Buffer(&'pipeline [u8]), /// An external filter (and only that) was applied and its results *have to be consumed*. Note that the output might be delayed, /// which requires special handling to eventually receive it. Process(driver::apply::MaybeDelayed<'pipeline>), } impl<'input, 'pipeline> ToWorktreeOutcome<'input, 'pipeline> { /// Return true if this outcome is delayed. In that case, one isn't allowed to use [`Read`] or cause a panic. pub fn is_delayed(&self) -> bool { matches!( self, ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Delayed(_)) ) } /// Returns `true` if the input buffer was actually changed, or `false` if it is returned directly. pub fn is_changed(&self) -> bool { !matches!(self, ToWorktreeOutcome::Unchanged(_)) } /// Return a buffer if we contain one, or `None` otherwise. /// /// This method is useful only if it's clear that no driver is available, which may cause a stream to be returned and not a buffer. pub fn as_bytes(&self) -> Option<&[u8]> { match self { ToWorktreeOutcome::Unchanged(b) | ToWorktreeOutcome::Buffer(b) => Some(b), ToWorktreeOutcome::Process(_) => None, } } /// Return a stream to read the drivers output from, if possible. /// /// Note that this is only the case if the driver process was applied last *and* didn't delay its output. pub fn as_read(&mut self) -> Option<&mut (dyn std::io::Read + '_)> { match self { ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Delayed(_)) | ToWorktreeOutcome::Unchanged(_) | ToWorktreeOutcome::Buffer(_) => None, ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Immediate(read)) => Some(read), } } } impl<'input, 'pipeline> std::io::Read for ToWorktreeOutcome<'input, 'pipeline> { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { match self { ToWorktreeOutcome::Unchanged(b) => b.read(buf), ToWorktreeOutcome::Buffer(b) => b.read(buf), ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Delayed(_)) => { panic!("BUG: must not try to read delayed output") } ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Immediate(r)) => r.read(buf), } } } impl<'pipeline, R> std::io::Read for ToGitOutcome<'pipeline, R> where R: std::io::Read, { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { match self { ToGitOutcome::Unchanged(r) => r.read(buf), ToGitOutcome::Process(r) => r.read(buf), ToGitOutcome::Buffer(r) => r.read(buf), } } } impl<'a, R> ToGitOutcome<'a, R> where R: std::io::Read, { /// If we contain a buffer, and not a stream, return it. pub fn as_bytes(&self) -> Option<&'a [u8]> { match self { ToGitOutcome::Unchanged(_) | ToGitOutcome::Process(_) => None, ToGitOutcome::Buffer(b) => Some(b), } } /// Return a stream to read the drivers output from. This is only possible if there is only a driver, and no other filter. pub fn as_read(&mut self) -> Option<&mut (dyn std::io::Read + '_)> { match self { ToGitOutcome::Process(read) => Some(read), ToGitOutcome::Unchanged(read) => Some(read), ToGitOutcome::Buffer(_) => None, } } /// Returns `true` if the input buffer was actually changed, or `false` if it is returned directly. pub fn is_changed(&self) -> bool { !matches!(self, ToGitOutcome::Unchanged(_)) } } gix-filter-0.11.1/src/pipeline/mod.rs000064400000000000000000000110331046102023000154750ustar 00000000000000use bstr::BString; use crate::{driver, eol, Driver, Pipeline}; /// Define how to perform CRLF round-trip checking when converting to git. #[derive(Default, Debug, Copy, Clone, Eq, PartialEq)] pub enum CrlfRoundTripCheck { /// Fail with an error if CRLF conversion isn't round-trip safe. Fail, /// Emit a warning using `gix_trace::warn!`, but don't fail. /// /// Note that the parent application has to setup tracing to make these events visible, along with a parent `span!`. #[default] Warn, /// Do nothing, do not perform round-trip check at all. Skip, } /// Additional configuration for the filter pipeline. #[derive(Default, Clone)] pub struct Options { /// Available (external) driver programs to invoke if attributes for path configure them. pub drivers: Vec, /// Global options to configure end-of-line conversions, to worktree or to git. pub eol_config: eol::Configuration, /// How to perform round-trip checks during end-of-line conversions to git. pub crlf_roundtrip_check: CrlfRoundTripCheck, /// All worktree encodings for round-trip checks should be performed. pub encodings_with_roundtrip_check: Vec<&'static encoding_rs::Encoding>, /// The object hash to use when applying the `ident` filter. pub object_hash: gix_hash::Kind, } /// Context that typically doesn't change throughout the lifetime of a pipeline, for use with `process` filters. /// /// Note that this is quite specific to third-party filters that actually make use of this additional context. #[derive(Default, Debug, Clone)] pub struct Context { /// The name of the reference that `HEAD` is pointing to. It's passed to `process` filters if present. pub ref_name: Option, /// The root-level tree that contains the current entry directly or indirectly, or the commit owning the tree (if available). /// /// This is passed to `process` filters if present. pub treeish: Option, /// The actual blob-hash of the data we are processing. It's passed to `process` filters if present. /// /// Note that this hash might be different from the `$Id$` of the respective `ident` filter, as the latter generates the hash itself. pub blob: Option, } const ATTRS: [&str; 6] = ["crlf", "ident", "filter", "eol", "text", "working-tree-encoding"]; /// Lifecycle impl Pipeline { /// Create a new pipeline with configured `drivers` (which should be considered safe to invoke), which are passed `context`. /// `eol_config` serves as fallback to understand how to convert line endings if no line-ending attributes are present. /// `crlf_roundtrip_check` corresponds to the git-configuration of `core.safecrlf`. /// `object_hash` is relevant for the `ident` filter. pub fn new(context: gix_command::Context, options: Options) -> Self { let mut attrs = gix_attributes::search::Outcome::default(); attrs.initialize_with_selection(&Default::default(), ATTRS); Pipeline { attrs, context: Context::default(), processes: driver::State::new(context), options, bufs: Default::default(), } } /// Turn ourselves into state managing possibly running driver processes. /// /// This can be used to control how these are terminated via [driver::State::shutdown()]. pub fn into_driver_state(self) -> driver::State { self.processes } } impl Default for Pipeline { fn default() -> Self { Pipeline::new(Default::default(), Default::default()) } } /// Access impl Pipeline { /// Return a mutable reference to the state that handles long running processes. /// Interacting with it directly allows to handle delayed results. pub fn driver_state_mut(&mut self) -> &mut driver::State { &mut self.processes } /// Provide mutable context that is made available to the process filters. /// /// The context set here is relevant for the [`convert_to_git()`][Self::convert_to_git()] and /// [`convert_to_worktree()`][Self::convert_to_worktree()] methods. pub fn driver_context_mut(&mut self) -> &mut Context { &mut self.context } /// Return a set of options for configuration after instantiation. pub fn options_mut(&mut self) -> &mut Options { &mut self.options } /// Return our double-buffers for reuse by the caller. pub fn buffers_mut(&mut self) -> &mut gix_utils::Buffers { &mut self.bufs } } /// #[allow(clippy::empty_docs)] pub mod convert; pub(crate) mod util; gix-filter-0.11.1/src/pipeline/util.rs000064400000000000000000000137731046102023000157100ustar 00000000000000use std::path::Path; use bstr::BStr; use gix_attributes::StateRef; use smallvec::SmallVec; use crate::{ driver, eol, eol::AttributesDigest, pipeline::{convert::configuration, Context, CrlfRoundTripCheck}, Driver, }; pub(crate) struct Configuration<'a> { pub(crate) driver: Option<&'a Driver>, /// What attributes say about CRLF handling. pub(crate) _attr_digest: Option, /// The final digest that includes configuration values pub(crate) digest: eol::AttributesDigest, pub(crate) encoding: Option<&'static encoding_rs::Encoding>, /// Whether or not to apply the `ident` filter pub(crate) apply_ident_filter: bool, } impl<'driver> Configuration<'driver> { pub(crate) fn at_path( rela_path: &BStr, drivers: &'driver [Driver], attrs: &mut gix_attributes::search::Outcome, attributes: &mut dyn FnMut(&BStr, &mut gix_attributes::search::Outcome), config: eol::Configuration, ) -> Result, configuration::Error> { fn extract_driver<'a>(drivers: &'a [Driver], attr: &gix_attributes::search::Match<'_>) -> Option<&'a Driver> { if let StateRef::Value(name) = attr.assignment.state { drivers.iter().find(|d| d.name == name.as_bstr()) } else { None } } fn extract_encoding( attr: &gix_attributes::search::Match<'_>, ) -> Result, configuration::Error> { match attr.assignment.state { StateRef::Set | StateRef::Unset => Err(configuration::Error::InvalidEncoding), StateRef::Value(name) => encoding_rs::Encoding::for_label(name.as_bstr()) .ok_or(configuration::Error::UnknownEncoding { name: name.as_bstr().to_owned(), }) .map(|encoding| { // The working-tree-encoding is the encoding we have to expect in the working tree. // If the specified one is the default encoding, there is nothing to do. if encoding == encoding_rs::UTF_8 { None } else { Some(encoding) } }), StateRef::Unspecified => Ok(None), } } /// This is based on `git_path_check_crlf` in the git codebase. fn extract_crlf(attr: &gix_attributes::search::Match<'_>) -> Option { match attr.assignment.state { StateRef::Unspecified => None, StateRef::Set => Some(eol::AttributesDigest::Text), StateRef::Unset => Some(eol::AttributesDigest::Binary), StateRef::Value(v) => { if v.as_bstr() == "input" { Some(eol::AttributesDigest::TextInput) } else if v.as_bstr() == "auto" { Some(eol::AttributesDigest::TextAuto) } else { None } } } } fn extract_eol(attr: &gix_attributes::search::Match<'_>) -> Option { match attr.assignment.state { StateRef::Unspecified | StateRef::Unset | StateRef::Set => None, StateRef::Value(v) => { if v.as_bstr() == "lf" { Some(eol::Mode::Lf) } else if v.as_bstr() == "crlf" { Some(eol::Mode::CrLf) } else { None } } } } attributes(rela_path, attrs); let attrs: SmallVec<[_; crate::pipeline::ATTRS.len()]> = attrs.iter_selected().collect(); let apply_ident_filter = attrs[1].assignment.state.is_set(); let driver = extract_driver(drivers, &attrs[2]); let encoding = extract_encoding(&attrs[5])?; let mut digest = extract_crlf(&attrs[4]); if digest.is_none() { digest = extract_crlf(&attrs[0]); } if digest != Some(AttributesDigest::Binary) { let eol = extract_eol(&attrs[3]); digest = match digest { Some(AttributesDigest::TextAuto) if eol == Some(eol::Mode::Lf) => Some(AttributesDigest::TextAutoInput), Some(AttributesDigest::TextAuto) if eol == Some(eol::Mode::CrLf) => { Some(AttributesDigest::TextAutoCrlf) } _ => match eol { Some(eol::Mode::CrLf) => Some(AttributesDigest::TextCrlf), Some(eol::Mode::Lf) => Some(AttributesDigest::TextInput), _ => digest, }, }; } let attr_digest = digest; digest = match digest { None => Some(config.auto_crlf.into()), Some(AttributesDigest::Text) => Some(config.to_eol().into()), _ => digest, }; Ok(Configuration { driver, _attr_digest: attr_digest, digest: digest.expect("always set by now"), encoding, apply_ident_filter, }) } } impl Context { pub(crate) fn with_path<'a>(&self, rela_path: &'a BStr) -> driver::apply::Context<'a, '_> { driver::apply::Context { rela_path, ref_name: self.ref_name.as_ref().map(AsRef::as_ref), treeish: self.treeish, blob: self.blob, } } } impl CrlfRoundTripCheck { pub(crate) fn to_eol_roundtrip_check(self, rela_path: &Path) -> Option> { match self { CrlfRoundTripCheck::Fail => Some(eol::convert_to_git::RoundTripCheck::Fail { rela_path }), CrlfRoundTripCheck::Warn => Some(eol::convert_to_git::RoundTripCheck::Warn { rela_path }), CrlfRoundTripCheck::Skip => None, } } } gix-filter-0.11.1/src/worktree/encode_to_git.rs000064400000000000000000000062061046102023000175630ustar 00000000000000/// Whether or not to perform round-trip checks. #[derive(Debug, Copy, Clone)] pub enum RoundTripCheck { /// Assure that we can losslessly convert the UTF-8 result back to the original encoding or fail with an error. Fail, /// Do not check if the encoding is round-trippable. Skip, } /// The error returned by [`encode_to_git()][super::encode_to_git()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Cannot convert input of {input_len} bytes to UTF-8 without overflowing")] Overflow { input_len: usize }, #[error("The input was malformed and could not be decoded as '{encoding}'")] Malformed { encoding: &'static str }, #[error("Encoding from '{src_encoding}' to '{dest_encoding}' and back is not the same")] RoundTrip { src_encoding: &'static str, dest_encoding: &'static str, }, } pub(crate) mod function { use encoding_rs::DecoderResult; use super::{Error, RoundTripCheck}; /// Decode `src` according to `src_encoding` to `UTF-8` for storage in git and place it in `buf`. /// Note that the encoding is always applied, there is no conditional even if `src_encoding` already is `UTF-8`. pub fn encode_to_git( src: &[u8], src_encoding: &'static encoding_rs::Encoding, buf: &mut Vec, round_trip: RoundTripCheck, ) -> Result<(), Error> { let mut decoder = src_encoding.new_decoder_with_bom_removal(); let buf_len = decoder .max_utf8_buffer_length_without_replacement(src.len()) .ok_or(Error::Overflow { input_len: src.len() })?; buf.clear(); buf.resize(buf_len, 0); let (res, read, written) = decoder.decode_to_utf8_without_replacement(src, buf, true); match res { DecoderResult::InputEmpty => { assert!( buf_len >= written, "encoding_rs estimates the maximum amount of bytes written correctly" ); assert_eq!(read, src.len(), "input buffer should be fully consumed"); buf.truncate(written); } DecoderResult::OutputFull => { unreachable!("we assure that the output buffer is big enough as per the encoder's estimate") } DecoderResult::Malformed(_, _) => { return Err(Error::Malformed { encoding: src_encoding.name(), }) } } match round_trip { RoundTripCheck::Fail => { // SAFETY: we trust `encoding_rs` to output valid UTF-8 only if we ask it to. #[allow(unsafe_code)] let str = unsafe { std::str::from_utf8_unchecked(buf) }; let (should_equal_src, _actual_encoding, _had_errors) = src_encoding.encode(str); if should_equal_src != src { return Err(Error::RoundTrip { src_encoding: src_encoding.name(), dest_encoding: "UTF-8", }); } } RoundTripCheck::Skip => {} } Ok(()) } } gix-filter-0.11.1/src/worktree/encode_to_worktree.rs000064400000000000000000000045641046102023000206470ustar 00000000000000/// The error returned by [`encode_to_worktree()][super::encode_to_worktree()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Cannot convert input of {input_len} UTF-8 bytes to target encoding without overflowing")] Overflow { input_len: usize }, #[error("Input was not UTF-8 encoded")] InputAsUtf8(#[from] std::str::Utf8Error), #[error("The character '{character}' could not be mapped to the {worktree_encoding}")] Unmappable { character: char, worktree_encoding: &'static str, }, } pub(crate) mod function { use encoding_rs::EncoderResult; use super::Error; /// Encode `src_utf8`, which is assumed to be UTF-8 encoded, according to `worktree_encoding` for placement in the working directory, /// and write it to `buf`, possibly resizing it. /// Note that the encoding is always applied, there is no conditional even if `worktree_encoding` and the `src` encoding are the same. pub fn encode_to_worktree( src_utf8: &[u8], worktree_encoding: &'static encoding_rs::Encoding, buf: &mut Vec, ) -> Result<(), Error> { let mut encoder = worktree_encoding.new_encoder(); let buf_len = encoder .max_buffer_length_from_utf8_if_no_unmappables(src_utf8.len()) .ok_or(Error::Overflow { input_len: src_utf8.len(), })?; buf.clear(); buf.resize(buf_len, 0); let src = std::str::from_utf8(src_utf8)?; let (res, read, written) = encoder.encode_from_utf8_without_replacement(src, buf, true); match res { EncoderResult::InputEmpty => { assert!( buf_len >= written, "encoding_rs estimates the maximum amount of bytes written correctly" ); assert_eq!(read, src_utf8.len(), "input buffer should be fully consumed"); buf.truncate(written); } EncoderResult::OutputFull => { unreachable!("we assure that the output buffer is big enough as per the encoder's estimate") } EncoderResult::Unmappable(c) => { return Err(Error::Unmappable { worktree_encoding: worktree_encoding.name(), character: c, }) } } Ok(()) } } gix-filter-0.11.1/src/worktree/encoding.rs000064400000000000000000000020451046102023000165440ustar 00000000000000use bstr::BStr; use encoding_rs::Encoding; /// #[allow(clippy::empty_docs)] pub mod for_label { use bstr::BString; /// The error returned by [for_label()][super::for_label()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("An encoding named '{name}' is not known")] Unknown { name: BString }, } } /// Try to produce a new `Encoding` for `label` or report an error if it is not known. /// /// ### Deviation /// /// * There is no special handling of UTF-16LE/BE with checks if data contains a BOM or not, like `git` as we don't expect to have /// data available here. /// * Special `-BOM` suffixed versions of `UTF-16` encodings are not supported. pub fn for_label<'a>(label: impl Into<&'a BStr>) -> Result<&'static Encoding, for_label::Error> { let mut label = label.into(); if label == "latin-1" { label = "ISO-8859-1".into(); } let enc = Encoding::for_label(label.as_ref()).ok_or_else(|| for_label::Error::Unknown { name: label.into() })?; Ok(enc) } gix-filter-0.11.1/src/worktree/mod.rs000064400000000000000000000013541046102023000155370ustar 00000000000000//! Worktree encodings are powered by the `encoding_rs` crate, which has a narrower focus than the `iconv` library. Thus this implementation //! is inherently more limited but will handle the common cases. //! //! Note that for encoding to legacy formats, [additional normalization steps](https://docs.rs/encoding_rs/0.8.32/encoding_rs/#preparing-text-for-the-encoders) //! can be taken, which we do not yet take unless there is specific examples or problems to solve. /// #[allow(clippy::empty_docs)] pub mod encoding; /// #[allow(clippy::empty_docs)] pub mod encode_to_git; pub use encode_to_git::function::encode_to_git; /// #[allow(clippy::empty_docs)] pub mod encode_to_worktree; pub use encode_to_worktree::function::encode_to_worktree;