image_hasher-2.0.0/.cargo_vcs_info.json0000644000000001360000000000100134470ustar { "git": { "sha1": "812a3d2177823c06f6b14201d7d46134ad6946ac" }, "path_in_vcs": "" }image_hasher-2.0.0/.github/workflows/ci.yml000064400000000000000000000027361046102023000167620ustar 00000000000000name: Build and test on: push: pull_request: schedule: - cron: '0 0 * * 1' jobs: full_ci: strategy: fail-fast: false matrix: os: [ ubuntu-latest, macos-latest, windows-latest ] toolchain: [ nightly, stable, 1.67.1 ] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - name: Setup rust version run: rustup default ${{ matrix.toolchain }} - name: Clippy fmt run: | cargo fmt -- --check if: ${{ matrix.toolchain == 'stable' }} - name: Cargo check run: | cargo check - name: Cargo check all features run: | cargo check --all-features if: ${{ matrix.toolchain == 'nightly' }} - name: Cargo check examples run: | cargo check --examples - name: Cargo check examples all features run: | cargo check --examples --all-features if: ${{ matrix.toolchain == 'nightly' }} - name: Clippy run: | cargo clippy if: ${{ matrix.toolchain == 'stable' }} - name: Test run: | cargo test --no-fail-fast -- --nocapture if: ${{ matrix.toolchain != '1.67.1' }} # dev dependencies require at least 1.74.0 - name: Test all features nightly run: cargo test --all-features --no-fail-fast -- --nocapture if: ${{ matrix.toolchain == 'nightly' }} - name: Doc run: | cargo doc image_hasher-2.0.0/.gitignore000064400000000000000000000000571046102023000142310ustar 00000000000000*~ *.swp target/ Cargo.lock .idea/ img_hash.imlimage_hasher-2.0.0/CHANGELOG.md000064400000000000000000000017741046102023000140610ustar 00000000000000# 2.0.0 - 11.03.2024 - Update to image 0.25 - Minimal Rust version set to 1.67.1 - Added median hash - [#13](https://github.com/qarmin/img_hash/pull/13) - Added bit ordering - [#14](https://github.com/qarmin/img_hash/pull/14) - Added into_inner() function on ImageHash object - [#11](https://github.com/qarmin/img_hash/pull/11) # 1.2.0 - 02.06.2023 - Update base64 to 0.21.0 - [#8](https://github.com/qarmin/img_hash/pull/8) # 1.1.2 - 26.11.2022 - Revert base64 version to 0.13.1 - Set minimal Rust version to 1.61 # 1.1.1 - 20.11.2022 - Rustdct fix, criterion/benchmark update, tests fixes - [#3](https://github.com/qarmin/img_hash/pull/3) # 1.1 - 20.10.2022 - Added CI, formatted code - [#2](https://github.com/qarmin/img_hash/pull/2) - Update to rustdct 0.7 - [#1](https://github.com/qarmin/img_hash/pull/1) # 1.0 - 02.04.2022 - First version without any logic changes with updated dependencies - [47e](47e4e243f79e170291580e2fb914b53b749cead6) - Some clippy changes - [8da](8da30ed6e46697fa1ab99a664b579e51e62dc6ae) image_hasher-2.0.0/Cargo.lock0000644000000472200000000000100114270ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "aho-corasick" version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] [[package]] name = "anes" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "autocfg" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "base64" version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" [[package]] name = "bumpalo" version = "3.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" [[package]] name = "bytemuck" version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" [[package]] name = "byteorder" version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "cast" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "ciborium" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" dependencies = [ "ciborium-io", "ciborium-ll", "serde", ] [[package]] name = "ciborium-io" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" [[package]] name = "ciborium-ll" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" dependencies = [ "ciborium-io", "half", ] [[package]] name = "clap" version = "4.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b230ab84b0ffdf890d5a10abdbc8b83ae1c4918275daea1ab8801f71536b2651" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" version = "4.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" dependencies = [ "anstyle", "clap_lex", ] [[package]] name = "clap_lex" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" [[package]] name = "criterion" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" dependencies = [ "anes", "cast", "ciborium", "clap", "criterion-plot", "is-terminal", "itertools", "num-traits", "once_cell", "oorandom", "plotters", "rayon", "regex", "serde", "serde_derive", "serde_json", "tinytemplate", "walkdir", ] [[package]] name = "criterion-plot" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", "itertools", ] [[package]] name = "crossbeam-deque" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" [[package]] name = "either" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "getrandom" version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", "libc", "wasi", ] [[package]] name = "half" version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] name = "hermit-abi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "image" version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9b4f005360d32e9325029b38ba47ebd7a56f3316df09249368939562d518645" dependencies = [ "bytemuck", "byteorder", "num-traits", ] [[package]] name = "image_hasher" version = "2.0.0" dependencies = [ "base64", "criterion", "image", "rand", "rustdct", "serde", "transpose", ] [[package]] name = "is-terminal" version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" dependencies = [ "hermit-abi", "libc", "windows-sys", ] [[package]] name = "itertools" version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] [[package]] name = "itoa" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "js-sys" version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" dependencies = [ "wasm-bindgen", ] [[package]] name = "libc" version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "log" version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "memchr" version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] name = "num-complex" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" dependencies = [ "num-traits", ] [[package]] name = "num-integer" version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" dependencies = [ "autocfg", "num-traits", ] [[package]] name = "num-traits" version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", ] [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oorandom" version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "plotters" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" dependencies = [ "num-traits", "plotters-backend", "plotters-svg", "wasm-bindgen", "web-sys", ] [[package]] name = "plotters-backend" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" [[package]] name = "plotters-svg" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" dependencies = [ "plotters-backend", ] [[package]] name = "ppv-lite86" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "primal-check" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9df7f93fd637f083201473dab4fee2db4c429d32e55e3299980ab3957ab916a0" dependencies = [ "num-integer", ] [[package]] name = "proc-macro2" version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom", ] [[package]] name = "rayon" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" dependencies = [ "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", ] [[package]] name = "regex" version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "rustdct" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b61555105d6a9bf98797c063c362a1d24ed8ab0431655e38f1cf51e52089551" dependencies = [ "rustfft", ] [[package]] name = "rustfft" version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e17d4f6cbdb180c9f4b2a26bbf01c4e647f1e1dea22fe8eb9db54198b32f9434" dependencies = [ "num-complex", "num-integer", "num-traits", "primal-check", "strength_reduce", "transpose", "version_check", ] [[package]] name = "ryu" version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "same-file" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ "winapi-util", ] [[package]] name = "serde" version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "strength_reduce" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" [[package]] name = "syn" version = "2.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "tinytemplate" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" dependencies = [ "serde", "serde_json", ] [[package]] name = "transpose" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6522d49d03727ffb138ae4cbc1283d3774f0d10aa7f9bf52e6784c45daf9b23" dependencies = [ "num-integer", "strength_reduce", ] [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", ] [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" dependencies = [ "cfg-if", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" dependencies = [ "quote", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" [[package]] name = "web-sys" version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96565907687f7aceb35bc5fc03770a8a0471d82e479f25832f54a0e3f4b28446" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" dependencies = [ "winapi", ] [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" [[package]] name = "windows_aarch64_msvc" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" [[package]] name = "windows_i686_gnu" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" [[package]] name = "windows_i686_msvc" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" [[package]] name = "windows_x86_64_gnu" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" [[package]] name = "windows_x86_64_msvc" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" image_hasher-2.0.0/Cargo.toml0000644000000027510000000000100114520ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.67.1" name = "image_hasher" version = "2.0.0" authors = [ "Rafał Mikrut ", "Austin Bonander ", ] description = "A simple library that provides perceptual hashing and difference calculation for images." documentation = "http://docs.rs/image_hasher" readme = "README.md" keywords = [ "image", "hash", "perceptual", "difference", ] license = "MIT OR Apache-2.0" repository = "http://github.com/qarmin/img_hash" [[bin]] name = "hash_image" [[bench]] name = "byte_to_float" harness = false [[bench]] name = "hash" harness = false [dependencies.base64] version = "0.22.0" [dependencies.image] version = ">=0.25,<0.26" default-features = false [dependencies.rustdct] version = "0.7" [dependencies.serde] version = "1.0" features = ["derive"] [dependencies.transpose] version = "0.2" [dev-dependencies.criterion] version = "0.5" [dev-dependencies.rand] version = "0.8" features = ["small_rng"] [features] nightly = [] image_hasher-2.0.0/Cargo.toml.orig000064400000000000000000000016331046102023000151310ustar 00000000000000[package] name = "image_hasher" version = "2.0.0" rust-version = "1.67.1" edition = "2021" license = "MIT OR Apache-2.0" authors = ["Rafał Mikrut ", "Austin Bonander "] description = "A simple library that provides perceptual hashing and difference calculation for images." documentation = "http://docs.rs/image_hasher" keywords = ["image", "hash", "perceptual", "difference"] repository = "http://github.com/qarmin/img_hash" readme = "README.md" [features] nightly = [] [dependencies] base64 = "0.22.0" image = { version = ">=0.25,<0.26", default-features = false } rustdct = "0.7" serde = { version = "1.0", features = ["derive"] } transpose = "0.2" [dev-dependencies] criterion = "0.5" rand = { version = "0.8", features = ["small_rng"] } [[bench]] name = "byte_to_float" harness = false [[bench]] name = "hash" harness = false [[bin]] name = "hash_image" image_hasher-2.0.0/LICENSE-APACHE000064400000000000000000000251371046102023000141730ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. image_hasher-2.0.0/LICENSE-MIT000064400000000000000000000020701046102023000136720ustar 00000000000000Copyright (c) 2015-2017 The `img_hash` Crate Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. image_hasher-2.0.0/README.md000064400000000000000000000041341046102023000135200ustar 00000000000000# Image Hasher A library for getting perceptual hash values of images. Thanks to Dr. Neal Krawetz for the outlines of the Mean (aHash), Gradient (dHash), and DCT (pHash) perceptual hash algorithms: http://www.hackerfactor.com/blog/?/archives/432-Looks-Like-It.html (Accessed August 2014) Also provides an implementation of [the Blockhash.io algorithm](http://blockhash.io). This crate can operate directly on buffers from the [PistonDevelopers/image][1] crate. [1]: https://github.com/PistonDevelopers/image This is fork of [img_hash](https://github.com/abonander/img_hash) library, but with updated dependencies. I am not familiar with this library, so if you have a need/willingness to develop it, I can add you as a co-maintainer. Usage ===== [Documentation](https://docs.rs/img_hash) Add `image_hasher` to your `Cargo.toml`: ``` image_hasher = "2.0.0" ``` Example program: ```rust use image_hasher::{HasherConfig, HashAlg}; fn main() { let image1 = image::open("image1.png").unwrap(); let image2 = image::open("image2.png").unwrap(); let hasher = HasherConfig::new().to_hasher(); let hash1 = hasher.hash_image(&image1); let hash2 = hasher.hash_image(&image2); println!("Image1 hash: {}", hash1.to_base64()); println!("Image2 hash: {}", hash2.to_base64()); println!("Hamming Distance: {}", hash1.dist(&hash2)); } ``` Benchmarking ============ In order to build and test on Rust stable, the benchmarks have to be placed behind a feature gate. If you have Rust nightly installed and want to run benchmarks, use the following command: ``` cargo +nightly bench ``` ## License Licensed under either of * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) at your option. ### Contribution Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. image_hasher-2.0.0/benches/byte_to_float.rs000064400000000000000000000130411046102023000170450ustar 00000000000000//! Evaluating converting from u8 [0, 255] to f32 [0, 1] using either naive conversion or a lookup //! table. //! //! I found that the naive version actually optimizes better because it can be vectorized while //! the lookup apparently can't. The difference is even more striking with AVX2 which provides even //! wider SIMD lanes for the conversion. //! //! The difference is not explained by bounds checking because the lookup doesn't appear to //! emit any: https://godbolt.org/z/sutBRr //! In fact, `.get_unchecked()` seems to make it perform *worse*. Try it. #[macro_use] extern crate criterion; use criterion::{BenchmarkId, Criterion, Throughput}; static LOOKUP: [f32; 256] = [ 0.0, 0.003921569, 0.007843138, 0.011764706, 0.015686275, 0.019607844, 0.023529412, 0.02745098, 0.03137255, 0.03529412, 0.039215688, 0.043137256, 0.047058824, 0.050980393, 0.05490196, 0.05882353, 0.0627451, 0.06666667, 0.07058824, 0.07450981, 0.078431375, 0.08235294, 0.08627451, 0.09019608, 0.09411765, 0.09803922, 0.101960786, 0.105882354, 0.10980392, 0.11372549, 0.11764706, 0.12156863, 0.1254902, 0.12941177, 0.13333334, 0.13725491, 0.14117648, 0.14509805, 0.14901961, 0.15294118, 0.15686275, 0.16078432, 0.16470589, 0.16862746, 0.17254902, 0.1764706, 0.18039216, 0.18431373, 0.1882353, 0.19215687, 0.19607843, 0.2, 0.20392157, 0.20784314, 0.21176471, 0.21568628, 0.21960784, 0.22352941, 0.22745098, 0.23137255, 0.23529412, 0.23921569, 0.24313726, 0.24705882, 0.2509804, 0.25490198, 0.25882354, 0.2627451, 0.26666668, 0.27058825, 0.27450982, 0.2784314, 0.28235295, 0.28627452, 0.2901961, 0.29411766, 0.29803923, 0.3019608, 0.30588236, 0.30980393, 0.3137255, 0.31764707, 0.32156864, 0.3254902, 0.32941177, 0.33333334, 0.3372549, 0.34117648, 0.34509805, 0.34901962, 0.3529412, 0.35686275, 0.36078432, 0.3647059, 0.36862746, 0.37254903, 0.3764706, 0.38039216, 0.38431373, 0.3882353, 0.39215687, 0.39607844, 0.4, 0.40392157, 0.40784314, 0.4117647, 0.41568628, 0.41960785, 0.42352942, 0.42745098, 0.43137255, 0.43529412, 0.4392157, 0.44313726, 0.44705883, 0.4509804, 0.45490196, 0.45882353, 0.4627451, 0.46666667, 0.47058824, 0.4745098, 0.47843137, 0.48235294, 0.4862745, 0.49019608, 0.49411765, 0.49803922, 0.5019608, 0.5058824, 0.50980395, 0.5137255, 0.5176471, 0.52156866, 0.5254902, 0.5294118, 0.53333336, 0.5372549, 0.5411765, 0.54509807, 0.54901963, 0.5529412, 0.5568628, 0.56078434, 0.5647059, 0.5686275, 0.57254905, 0.5764706, 0.5803922, 0.58431375, 0.5882353, 0.5921569, 0.59607846, 0.6, 0.6039216, 0.60784316, 0.6117647, 0.6156863, 0.61960787, 0.62352943, 0.627451, 0.6313726, 0.63529414, 0.6392157, 0.6431373, 0.64705884, 0.6509804, 0.654902, 0.65882355, 0.6627451, 0.6666667, 0.67058825, 0.6745098, 0.6784314, 0.68235296, 0.6862745, 0.6901961, 0.69411767, 0.69803923, 0.7019608, 0.7058824, 0.70980394, 0.7137255, 0.7176471, 0.72156864, 0.7254902, 0.7294118, 0.73333335, 0.7372549, 0.7411765, 0.74509805, 0.7490196, 0.7529412, 0.75686276, 0.7607843, 0.7647059, 0.76862746, 0.77254903, 0.7764706, 0.78039217, 0.78431374, 0.7882353, 0.7921569, 0.79607844, 0.8, 0.8039216, 0.80784315, 0.8117647, 0.8156863, 0.81960785, 0.8235294, 0.827451, 0.83137256, 0.8352941, 0.8392157, 0.84313726, 0.84705883, 0.8509804, 0.85490197, 0.85882354, 0.8627451, 0.8666667, 0.87058824, 0.8745098, 0.8784314, 0.88235295, 0.8862745, 0.8901961, 0.89411765, 0.8980392, 0.9019608, 0.90588236, 0.9098039, 0.9137255, 0.91764706, 0.92156863, 0.9254902, 0.92941177, 0.93333334, 0.9372549, 0.9411765, 0.94509804, 0.9490196, 0.9529412, 0.95686275, 0.9607843, 0.9647059, 0.96862745, 0.972549, 0.9764706, 0.98039216, 0.9843137, 0.9882353, 0.99215686, 0.99607843, 1.0, ]; fn bench_functions(c: &mut Criterion) { let sizes = [64usize, 128, 256, 384, 512, 768, 1024]; let mut group = c.benchmark_group("byte to float conversion"); for size in sizes { group.throughput(Throughput::Bytes(size as _)); group.bench_function(BenchmarkId::new("lookup", size), |b| { let vals: Vec = (0..=255).cycle().take(size).collect(); b.iter_with_setup( || Vec::with_capacity(size), move |mut out: Vec| out.extend(vals.iter().map(|&x| LOOKUP[x as usize])), ); }); group.bench_function(BenchmarkId::new("naive", size), |b| { let vals: Vec = (0..=255).cycle().take(size).collect(); b.iter_with_setup( || Vec::with_capacity(size), |mut out| out.extend(vals.iter().map(|&x| x as f32 / 255.)), ); }); } group.finish() } criterion_group!(benches, bench_functions); criterion_main!(benches); image_hasher-2.0.0/benches/hash.rs000064400000000000000000000031051046102023000151360ustar 00000000000000#[macro_use] extern crate criterion; use criterion::{Bencher, BenchmarkId, Criterion, Throughput}; use image_hasher::{HashAlg, HasherConfig, ImageHash}; use image::{ImageBuffer, Rgba}; use rand::{rngs::SmallRng, RngCore, SeedableRng}; type RgbaBuf = ImageBuffer, Vec>; fn gen_test_img(width: u32, height: u32) -> RgbaBuf { let len = (width * height * 4) as usize; let mut buf = Vec::with_capacity(len); unsafe { buf.set_len(len); } // We immediately fill the buffer. let mut rng = SmallRng::seed_from_u64(0xc0ffee); rng.fill_bytes(&mut *buf); ImageBuffer::from_raw(width, height, buf).unwrap() } fn bench_functions(c: &mut Criterion) { const BENCH_HASH_SIZE: u32 = 8; const TEST_IMAGE_SIZE: u32 = 64; let mut group = c.benchmark_group("hash"); let img = gen_test_img(TEST_IMAGE_SIZE, TEST_IMAGE_SIZE); for alg in [ HashAlg::Mean, HashAlg::Median, HashAlg::Gradient, HashAlg::DoubleGradient, HashAlg::VertGradient, HashAlg::Blockhash, ] { group.bench_with_input( BenchmarkId::new("hash", format!("{:?}", alg)), &img, |b, img| { let hasher = HasherConfig::new() .hash_size(BENCH_HASH_SIZE, BENCH_HASH_SIZE) .hash_alg(alg) .to_hasher(); b.iter(|| { hasher.hash_image(img); }); }, ); } group.finish(); } criterion_group!(benches, bench_functions); criterion_main!(benches); image_hasher-2.0.0/src/alg/blockhash.rs000064400000000000000000000164471046102023000161110ustar 00000000000000#![allow(clippy::redundant_closure_call)] use std::cmp::Ordering; use std::mem; use std::ops::AddAssign; // Implementation adapted from Python version: // https://github.com/commonsmachinery/blockhash-python/blob/e8b009d/blockhash.py // Main site: http://blockhash.io use image::{GenericImageView, Pixel}; use crate::BitSet; use crate::{HashBytes, Image}; use crate::BitOrder; const FLOAT_EQ_MARGIN: f32 = 0.001; pub fn blockhash( img: &I, width: u32, height: u32, bit_order: BitOrder, ) -> B { assert_eq!(width % 4, 0, "width must be multiple of 4"); assert_eq!(height % 4, 0, "height must be multiple of 4"); let (iwidth, iheight) = img.dimensions(); // Skip the floating point math if it's unnecessary if iwidth % width == 0 && iheight % height == 0 { blockhash_fast(img, width, height, bit_order) } else { blockhash_slow(img, width, height, bit_order) } } macro_rules! gen_hash { ($imgty:ty, $valty:ty, $blocks: expr, $width:expr, $block_width:expr, $block_height:expr, $eq_fn:expr, $bit_order:expr) => {{ #[allow(deprecated)] // deprecated as of 0.22 let channel_count = <<$imgty as GenericImageView>::Pixel as Pixel>::CHANNEL_COUNT as u32; let group_len = ($width * 4) as usize; let block_area = $block_width * $block_height; let cmp_factor = match channel_count { 3 | 4 => 255u32 as $valty * 3u32 as $valty, 2 | 1 => 255u32 as $valty, _ => panic!("Unrecognized channel count from Image: {}", channel_count), } * block_area / (2u32 as $valty); let medians: Vec<$valty> = $blocks.chunks(group_len).map(get_median).collect(); BitSet::from_bools( $blocks .chunks(group_len) .zip(medians) .flat_map(|(blocks, median)| { blocks.iter().map(move |&block| { block > median || ($eq_fn(block, median) && median > cmp_factor) }) }), $bit_order, ) }}; } fn block_adder<'a, T: AddAssign + 'a>( blocks: &'a mut [T], width: u32, ) -> impl FnMut(u32, u32, T) + 'a { move |x, y, add| (blocks[(y as usize) * (width as usize) + (x as usize)] += add) } fn blockhash_slow( img: &I, hwidth: u32, hheight: u32, bit_order: BitOrder, ) -> B { let mut blocks = vec![0f32; (hwidth * hheight) as usize]; let (iwidth, iheight) = img.dimensions(); // Block dimensions, in pixels let (block_width, block_height) = ( iwidth as f32 / hwidth as f32, iheight as f32 / hheight as f32, ); img.foreach_pixel8(|x, y, px| { let mut add_to_block = block_adder(&mut blocks, hwidth); let px_sum = sum_px(px) as f32; let (x, y) = (x as f32, y as f32); let block_x = x / block_width; let block_y = y / block_height; let x_mod = x + 1. % block_width; let y_mod = y + 1. % block_height; // terminology is mostly arbitrary as long as we're consistent // if `x` evenly divides `block_height`, this weight will be 0 // so we don't double the sum as `block_top` will equal `block_bottom` let weight_left = x_mod.fract(); let weight_right = 1. - weight_left; let weight_top = y_mod.fract(); let weight_bottom = 1. - weight_top; let block_left = block_x.floor() as u32; let block_top = block_y.floor() as u32; let block_right = if x_mod.trunc() == 0. { block_x.ceil() as u32 } else { block_left }; let block_bottom = if y_mod.trunc() == 0. { block_y.ceil() as u32 } else { block_top }; add_to_block(block_left, block_top, px_sum * weight_left * weight_top); add_to_block( block_left, block_bottom, px_sum * weight_left * weight_bottom, ); add_to_block(block_right, block_top, px_sum * weight_right * weight_top); add_to_block( block_right, block_bottom, px_sum * weight_right * weight_bottom, ); }); gen_hash!( I, f32, blocks, hwidth, block_width, block_height, |l: f32, r: f32| (l - r).abs() < FLOAT_EQ_MARGIN, bit_order ) } fn blockhash_fast( img: &I, hwidth: u32, hheight: u32, bit_order: BitOrder, ) -> B { let mut blocks = vec![0u32; (hwidth * hheight) as usize]; let (iwidth, iheight) = img.dimensions(); let (block_width, block_height) = (iwidth / hwidth, iheight / hheight); img.foreach_pixel8(|x, y, px| { let mut add_to_block = block_adder(&mut blocks, hwidth); let px_sum = sum_px(px); let block_x = x / block_width; let block_y = y / block_height; add_to_block(block_x, block_y, px_sum); }); gen_hash!( I, u32, blocks, hwidth, block_width, block_height, |l, r| l == r, bit_order ) } #[inline(always)] fn sum_px(chans: &[u8]) -> u32 { // Branch prediction should eliminate the match after a few iterations match chans.len() { 4 => { if chans[3] == 0 { 255 * 3 } else { sum_px(&chans[..3]) } } 3 => chans.iter().map(|&x| x as u32).sum(), 2 => { if chans[1] == 0 { 255 } else { chans[0] as u32 } } 1 => chans[0] as u32, channels => panic!("Unsupported channel count in image: {channels}"), } } fn get_median(data: &[T]) -> T { let mut scratch = data.to_owned(); let median = scratch.len() / 2; *qselect_inplace(&mut scratch, median) } const SORT_THRESH: usize = 8; fn qselect_inplace(data: &mut [T], k: usize) -> &mut T { let len = data.len(); assert!( k < len, "Called qselect_inplace with k = {k} and data length: {len}", ); if len < SORT_THRESH { data.sort_by(|left, right| left.partial_cmp(right).unwrap_or(Ordering::Less)); return &mut data[k]; } let pivot_idx = partition(data); match k.cmp(&pivot_idx) { Ordering::Less => qselect_inplace(&mut data[..pivot_idx], k), Ordering::Equal => &mut data[pivot_idx], Ordering::Greater => qselect_inplace(&mut data[pivot_idx + 1..], k - pivot_idx - 1), } } fn partition(data: &mut [T]) -> usize { let len = data.len(); let pivot_idx = { let first = (&data[0], 0); let mid = (&data[len / 2], len / 2); let last = (&data[len - 1], len - 1); median_of_3(&first, &mid, &last).1 }; data.swap(pivot_idx, len - 1); let mut curr = 0; for i in 0..len - 1 { if data[i] < data[len - 1] { data.swap(i, curr); curr += 1; } } data.swap(curr, len - 1); curr } fn median_of_3(mut x: T, mut y: T, mut z: T) -> T { if x > y { mem::swap(&mut x, &mut y); } if x > z { mem::swap(&mut x, &mut z); } if x > z { mem::swap(&mut y, &mut z); } y } image_hasher-2.0.0/src/alg/mod.rs000064400000000000000000000223011046102023000147140ustar 00000000000000#![allow(clippy::needless_lifetimes)] use crate::CowImage::*; use crate::HashVals::*; use crate::{BitSet, HashCtxt, Image}; use self::HashAlg::*; mod blockhash; /// Hash algorithms implemented by this crate. /// /// Implemented primarily based on the high-level descriptions on the blog Hacker Factor /// written by Dr. Neal Krawetz: http://www.hackerfactor.com/ /// /// Note that `hash_width` and `hash_height` in these docs refer to the parameters of /// [`HasherConfig::hash_size()`](struct.HasherConfig.html#method.hash_size). /// /// ### Choosing an Algorithm /// Each algorithm has different performance characteristics #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] pub enum HashAlg { /// The Mean hashing algorithm. /// /// The image is converted to grayscale, scaled down to `hash_width x hash_height`, /// the mean pixel value is taken, and then the hash bits are generated by comparing /// the pixels of the descaled image to the mean. /// /// This is the most basic hash algorithm supported, resistant only to changes in /// resolution, aspect ratio, and overall brightness. /// /// Further Reading: /// http://www.hackerfactor.com/blog/?/archives/432-Looks-Like-It.html Mean, /// The Median hashing algorithm. /// /// The image is converted to grayscale, scaled down to `hash_width x hash_height`, /// the median pixel value is taken, and then the hash bits are generated by comparing /// the pixels of the descaled image to the mean. /// /// Median hashing in combiantion with preproc_dct is the basis for pHash Median, /// The Gradient hashing algorithm. /// /// The image is converted to grayscale, scaled down to `(hash_width + 1) x hash_height`, /// and then in row-major order the pixels are compared with each other, setting bits /// in the hash for each comparison. The extra pixel is needed to have `hash_width` comparisons /// per row. /// /// This hash algorithm is as fast or faster than Mean (because it only traverses the /// hash data once) and is more resistant to changes than Mean. /// /// Further Reading: /// http://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html Gradient, /// The Vertical-Gradient hashing algorithm. /// /// Equivalent to [`Gradient`](#variant.Gradient) but operating on the columns of the image /// instead of the rows. VertGradient, /// The Double-Gradient hashing algorithm. /// /// An advanced version of [`Gradient`](#variant.Gradient); /// resizes the grayscaled image to `(width / 2 + 1) x (height / 2 + 1)` and compares columns /// in addition to rows. /// /// This algorithm is slightly slower than `Gradient` (resizing the image dwarfs /// the hash time in most cases) but the extra comparison direction may improve results (though /// you might want to consider increasing /// [`hash_size`](struct.HasherConfig.html#method.hash_size) /// to accommodate the extra comparisons). DoubleGradient, /// The [Blockhash.io](https://blockhash.io) algorithm. /// /// Compared to the other algorithms, this does not require any preprocessing steps and so /// may be significantly faster at the cost of some resilience. /// /// The algorithm is described in a high level here: /// https://github.com/commonsmachinery/blockhash-rfc/blob/master/main.md Blockhash, } /// The bit order used when forming the bit string of the hash #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Copy)] pub enum BitOrder { /// Least Significant Bit First. This turns a filter output of 1000 000 into the hash 0x01 /// /// This is the traditional mode of this library LsbFirst, /// Most Significant Bit First. This turns a filter output of 1000 000 into the hash 0x80 /// /// This mode is popular among other libraries, and thus useful to generate hashes compatible with them MsbFirst, } fn next_multiple_of_2(x: u32) -> u32 { (x + 1) & !1 } fn next_multiple_of_4(x: u32) -> u32 { (x + 3) & !3 } impl HashAlg { pub(crate) fn hash_image(&self, ctxt: &HashCtxt, image: &I) -> B where I: Image, B: BitSet, { let post_gauss = ctxt.gauss_preproc(image); let HashCtxt { width, height, bit_order, .. } = *ctxt; if *self == Blockhash { return match post_gauss { Borrowed(img) => blockhash::blockhash(img, width, height, bit_order), Owned(img) => blockhash::blockhash(&img, width, height, bit_order), }; } let grayscale = post_gauss.to_grayscale(); let (resize_width, resize_height) = self.resize_dimensions(width, height); let hash_vals = ctxt.calc_hash_vals(&grayscale, resize_width, resize_height); let rowstride = resize_width as usize; match (*self, hash_vals) { (Mean, Floats(ref floats)) => B::from_bools(mean_hash_f32(floats), bit_order), (Mean, Bytes(ref bytes)) => B::from_bools(mean_hash_u8(bytes), bit_order), (Gradient, Floats(ref floats)) => { B::from_bools(gradient_hash(floats, rowstride), bit_order) } (Gradient, Bytes(ref bytes)) => { B::from_bools(gradient_hash(bytes, rowstride), bit_order) } (VertGradient, Floats(ref floats)) => { B::from_bools(vert_gradient_hash(floats, rowstride), bit_order) } (VertGradient, Bytes(ref bytes)) => { B::from_bools(vert_gradient_hash(bytes, rowstride), bit_order) } (DoubleGradient, Floats(ref floats)) => { B::from_bools(double_gradient_hash(floats, rowstride), bit_order) } (DoubleGradient, Bytes(ref bytes)) => { B::from_bools(double_gradient_hash(bytes, rowstride), bit_order) } (Median, Floats(ref floats)) => B::from_bools(median_hash_f32(floats), bit_order), (Median, Bytes(ref bytes)) => B::from_bools(median_hash_u8(bytes), bit_order), (Blockhash, _) => unreachable!(), } } pub(crate) fn round_hash_size(&self, width: u32, height: u32) -> (u32, u32) { match *self { DoubleGradient => (next_multiple_of_2(width), next_multiple_of_2(height)), Blockhash => (next_multiple_of_4(width), next_multiple_of_4(height)), _ => (width, height), } } pub(crate) fn resize_dimensions(&self, width: u32, height: u32) -> (u32, u32) { match *self { Mean => (width, height), Median => (width, height), Blockhash => panic!("Blockhash algorithm does not resize"), Gradient => (width + 1, height), VertGradient => (width, height + 1), DoubleGradient => (width / 2 + 1, height / 2 + 1), } } } fn mean_hash_u8<'a>(luma: &'a [u8]) -> impl Iterator + 'a { let mean = (luma.iter().map(|&l| l as u32).sum::() / luma.len() as u32) as u8; luma.iter().map(move |&x| x >= mean) } fn mean_hash_f32<'a>(luma: &'a [f32]) -> impl Iterator + 'a { let mean = luma.iter().sum::() / luma.len() as f32; luma.iter().map(move |&x| x >= mean) } fn median_f32(numbers: &[f32]) -> f32 { let mut sorted = numbers.to_owned(); sorted.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); let mid = sorted.len() / 2; if sorted.len() % 2 == 0 { let a = sorted[mid - 1]; let b = sorted[mid]; (a + b) / 2.0 } else { sorted[mid] } } fn median_u8(numbers: &[u8]) -> u8 { let mut sorted = numbers.to_owned(); sorted.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); let mid = sorted.len() / 2; if sorted.len() % 2 == 0 { let a = sorted[mid - 1]; let b = sorted[mid]; (a + b) / 2 } else { sorted[mid] } } fn median_hash_u8<'a>(luma: &'a [u8]) -> impl Iterator + 'a { let med = median_u8(luma); luma.iter().map(move |&x| x >= med) } fn median_hash_f32<'a>(luma: &'a [f32]) -> impl Iterator + 'a { let med = median_f32(luma); luma.iter().map(move |&x| x >= med) } /// The guts of the gradient hash separated so we can reuse them fn gradient_hash_impl(luma: I) -> impl Iterator where I: IntoIterator + Clone, ::Item: PartialOrd, { luma.clone() .into_iter() .skip(1) .zip(luma) .map(|(this, last)| last < this) } fn gradient_hash<'a, T: PartialOrd>( luma: &'a [T], rowstride: usize, ) -> impl Iterator + 'a { luma.chunks(rowstride).flat_map(gradient_hash_impl) } fn vert_gradient_hash<'a, T: PartialOrd>( luma: &'a [T], rowstride: usize, ) -> impl Iterator + 'a { (0..rowstride) .map(move |col_start| luma[col_start..].iter().step_by(rowstride)) .flat_map(gradient_hash_impl) } fn double_gradient_hash<'a, T: PartialOrd>( luma: &'a [T], rowstride: usize, ) -> impl Iterator + 'a { gradient_hash(luma, rowstride).chain(vert_gradient_hash(luma, rowstride)) } image_hasher-2.0.0/src/bin/hash_image.rs000064400000000000000000000012021046102023000162240ustar 00000000000000//! Hash an image and print the Base64 value use std::env; use image_hasher::HasherConfig; fn main() -> Result<(), String> { let args = env::args().collect::>(); assert_eq!(args.len(), 2); let image = image::open(&args[1]).map_err(|e| format!("failed to open {}: {}", &args[1], e))?; let hash = HasherConfig::new() .hash_size(8, 8) .to_hasher() .hash_image(&image); #[allow(clippy::format_collect)] let hash_str = hash .as_bytes() .iter() .map(|b| format!("{b:02x}")) .collect::(); println!("{}: {}", &args[1], hash_str); Ok(()) } image_hasher-2.0.0/src/dct.rs000064400000000000000000000101041046102023000141420ustar 00000000000000use std::sync::Arc; use rustdct::{DctPlanner, TransformType2And3}; use transpose::transpose_inplace; pub const SIZE_MULTIPLIER: u32 = 2; pub const SIZE_MULTIPLIER_U: usize = SIZE_MULTIPLIER as usize; pub struct DctCtxt { row_dct: Arc>, col_dct: Arc>, width: usize, height: usize, } impl DctCtxt { pub fn new(width: u32, height: u32) -> Self { let mut planner = DctPlanner::new(); let width = width as usize * SIZE_MULTIPLIER_U; let height = height as usize * SIZE_MULTIPLIER_U; DctCtxt { row_dct: planner.plan_dct2(width), col_dct: planner.plan_dct2(height), width, height, } } pub fn width(&self) -> u32 { self.width as u32 } pub fn height(&self) -> u32 { self.height as u32 } /// Perform a 2D DCT on a 1D-packed vector with a given `width x height`. /// /// Assumes `packed_2d` is double-length for scratch space. Returns the vector truncated to /// `width * height`. /// /// ### Panics /// If `self.width * self.height * 2 != packed_2d.len()` pub fn dct_2d(&self, mut packed_2d: Vec) -> Vec { let Self { ref row_dct, ref col_dct, width, height, } = *self; let trunc_len = width * height; assert_eq!(trunc_len + self.required_scratch(), packed_2d.len()); { let (packed_2d, scratch) = packed_2d.split_at_mut(trunc_len); for row_in in packed_2d.chunks_mut(width) { row_dct.process_dct2_with_scratch(row_in, scratch); } transpose_inplace( packed_2d, &mut scratch[..std::cmp::max(width, height)], width, height, ); for row_in in packed_2d.chunks_mut(height) { col_dct.process_dct2_with_scratch(row_in, scratch); } transpose_inplace( packed_2d, &mut scratch[..std::cmp::max(width, height)], width, height, ); } packed_2d.truncate(trunc_len); packed_2d } pub fn crop_2d(&self, packed: Vec) -> Vec { crop_2d_dct(packed, self.width) } pub fn required_scratch(&self) -> usize { let transpose_scratch = std::cmp::max(self.width, self.height); let dct_scratch = std::cmp::max( self.row_dct.get_scratch_len(), self.col_dct.get_scratch_len(), ); std::cmp::max(transpose_scratch, dct_scratch) } } /// Crop the values off a 1D-packed 2D DCT. /// /// Returns `packed` truncated to the premultiplied size, as determined by `rowstride` /// /// Generic for easier testing fn crop_2d_dct(mut packed: Vec, rowstride: usize) -> Vec { // assert that the rowstride was previously multiplied by SIZE_MULTIPLIER assert_eq!(rowstride % SIZE_MULTIPLIER_U, 0); assert!( rowstride / SIZE_MULTIPLIER_U > 0, "rowstride cannot be cropped: {rowstride}", ); let new_rowstride = rowstride / SIZE_MULTIPLIER_U; for new_row in 0..packed.len() / (rowstride * SIZE_MULTIPLIER_U) { let (dest, src) = packed.split_at_mut(new_row * new_rowstride + rowstride); let dest_start = dest.len() - new_rowstride; let src_start = new_rowstride * new_row; let src_end = src_start + new_rowstride; dest[dest_start..].copy_from_slice(&src[src_start..src_end]); } let new_len = packed.len() / (SIZE_MULTIPLIER_U * SIZE_MULTIPLIER_U); packed.truncate(new_len); packed } #[test] fn test_crop_2d_dct() { let packed: Vec = (0..64).collect(); assert_eq!( crop_2d_dct(packed.clone(), 8), [ 0, 1, 2, 3, // 4, 5, 6, 7 8, 9, 10, 11, // 12, 13, 14, 15 16, 17, 18, 19, // 20, 21, 22, 23, 24, 25, 26, 27, // 28, 29, 30, 31, // 32 .. 64 ] ); } #[test] fn test_transpose() {} image_hasher-2.0.0/src/lib.rs000064400000000000000000000565411046102023000141550ustar 00000000000000//! A crate that provides several perceptual hashing algorithms for images. //! Supports images opened with the [image] crate from Piston. //! //! ```rust,no_run //! //! use image_hasher::{HasherConfig, HashAlg}; //! //! let image1 = image::open("image1.png").unwrap(); //! let image2 = image::open("image2.png").unwrap(); //! //! let hasher = HasherConfig::new().to_hasher(); //! //! let hash1 = hasher.hash_image(&image1); //! let hash2 = hasher.hash_image(&image2); //! //! println!("Image1 hash: {}", hash1.to_base64()); //! println!("Image2 hash: {}", hash2.to_base64()); //! //! println!("Hamming Distance: {}", hash1.dist(&hash2)); //! ``` //! [image]: https://github.com/PistonDevelopers/image #![deny(missing_docs)] #![cfg_attr(feature = "nightly", feature(specialization))] #[macro_use] extern crate serde; use std::borrow::Cow; use std::fmt; use std::marker::PhantomData; use base64::Engine; use image::imageops; pub use image::imageops::FilterType; use image::GrayImage; use serde::{Deserialize, Serialize}; pub use alg::BitOrder; pub use alg::HashAlg; use dct::DctCtxt; pub(crate) use traits::BitSet; pub use traits::{DiffImage, HashBytes, Image}; mod dct; mod alg; mod traits; /// **Start here**. Configuration builder for [`Hasher`](::Hasher). /// /// Playing with the various options on this struct allows you to tune the performance of image /// hashing to your needs. /// /// Sane, reasonably fast defaults are provided by the [`::new()`](#method.new) constructor. If /// you just want to start hashing images and don't care about the details, it's as simple as: /// /// ```rust /// use image_hasher::HasherConfig; /// /// let hasher = HasherConfig::new().to_hasher(); /// // hasher.hash_image(image); /// ``` /// /// # Configuration Options /// The hash API is highly configurable to tune both performance characteristics and hash /// resilience. /// /// ### Hash Size /// Setter: [`.hash_size()`](#method.hash_size) /// /// Dimensions of the final hash, as width x height, in bits. A hash size of `8, 8` produces an /// 8 x 8 bit (8 byte) hash. Larger hash sizes take more time to compute as well as more memory, /// but aren't necessarily better for comparing images. The best hash size depends on both /// the [hash algorithm](#hash-algorithm) and the input dataset. If your images are mostly /// wide aspect ratio (landscape) then a larger width and a smaller height hash size may be /// preferable. Optimal values can really only be discovered empirically though. /// /// (As the author experiments, suggested values will be added here for various algorithms.) /// /// ### Hash Algorithm /// Setter: [`.hash_alg()`](#method.hash_alg) /// Definition: [`HashAlg`](enum.HashAlg.html) /// /// Multiple methods of calculating image hashes are provided in this crate under the `HashAlg` /// enum. Each algorithm is different but they all produce the same size hashes as governed by /// `hash_size`. /// /// ### Hash Bytes Container / `B` Type Param /// Use [`with_bytes_type::()`](#method.with_bytes_type) instead of `new()` to customize. /// /// This hash API allows you to specify the bytes container type for generated hashes. The default /// allows for any arbitrary hash size (see above) but requires heap-allocation. Instead, you /// can select an array type which allows hashes to be allocated inline, but requires consideration /// of the possible sizes of hash you want to generate so you don't waste memory. /// /// Another advantage of using a constant-sized hash type is that the compiler may be able to /// produce more optimal code for generating and comparing hashes. /// /// ```rust /// # use image_hasher::*; /// /// // Use default container type, good for any hash size /// let config = HasherConfig::new(); /// /// /// Inline hash container that exactly fits the default hash size /// let config = HasherConfig::with_bytes_type::<[u8; 8]>(); /// ``` /// #[derive(Serialize, Deserialize)] pub struct HasherConfig> { width: u32, height: u32, gauss_sigmas: Option<[f32; 2]>, #[serde(with = "SerdeFilterType")] resize_filter: FilterType, dct: bool, hash_alg: HashAlg, bit_order: BitOrder, _bytes_type: PhantomData, } impl HasherConfig> { /// Construct a new hasher config with sane, reasonably fast defaults. /// /// A default hash container type is provided as a default type parameter which is guaranteed /// to fit any hash size. pub fn new() -> Self { Self::with_bytes_type() } /// Construct a new config with the selected [`HashBytes`](trait.HashBytes.html) impl. /// /// You may opt for an array type which allows inline allocation of hash data. /// /// ### Note /// The default hash size requires 64 bits / 8 bytes of storage. You can change this /// with [`.hash_size()`](#method.hash_size). pub fn with_bytes_type() -> HasherConfig { HasherConfig { width: 8, height: 8, gauss_sigmas: None, resize_filter: FilterType::Lanczos3, dct: false, hash_alg: HashAlg::Gradient, bit_order: BitOrder::LsbFirst, _bytes_type: PhantomData, } } } impl Default for HasherConfig> { fn default() -> Self { Self::new() } } impl HasherConfig { /// Set a new hash width and height; these can be the same. /// /// The number of bits in the resulting hash will be `width * height`. If you are using /// a fixed-size `HashBytes` type then you must ensure it can hold at least this many bits. /// You can check this with [`HashBytes::max_bits()`](#method.max_bits). /// /// ### Rounding Behavior /// Certain hash algorithms need to round this value to function properly: /// /// * [`DoubleGradient`](enum.HashAlg.html#variant.DoubleGradient) rounds to the next multiple of 2; /// * [`Blockhash`](enum.HashAlg.html#variant.Blockhash) rounds to the next multiple of 4. /// /// If the chosen values already satisfy these requirements then nothing is changed. /// /// ### Recommended Values /// The hash granularity increases with `width * height`, although there are diminishing /// returns for higher values. Start small. A good starting value to try is `8, 8`. /// /// When using DCT preprocessing having `width` and `height` be the same value will improve /// hashing performance as only one set of coefficients needs to be used. #[must_use] pub fn hash_size(self, width: u32, height: u32) -> Self { Self { width, height, ..self } } /// Set the filter used to resize images during hashing. /// /// Note when picking a filter that images are almost always reduced in size. /// Has no effect with the Blockhash algorithm as it does not resize. #[must_use] pub fn resize_filter(self, resize_filter: FilterType) -> Self { Self { resize_filter, ..self } } /// Set the algorithm used to generate hashes. /// /// Each algorithm has different performance characteristics. #[must_use] pub fn hash_alg(self, hash_alg: HashAlg) -> Self { Self { hash_alg, ..self } } /// Enable preprocessing with the Discrete Cosine Transform (DCT). /// /// Does nothing when used with [the Blockhash.io algorithm](HashAlg::Blockhash) /// which does not scale the image. /// (RFC: it would be possible to shoehorn a DCT into the Blockhash algorithm but it's /// not clear what benefits, if any, that would provide). /// /// After conversion to grayscale, the image is scaled down to `width * 2 x height * 2` /// and then the Discrete Cosine Transform is performed on the luminance values. The DCT /// essentially transforms the 2D image from the spatial domain with luminance values /// to a 2D frequency domain where the values are amplitudes of cosine waves. The resulting /// 2D matrix is then cropped to the low `width * height` corner and the /// configured hash algorithm is performed on that. /// /// In layman's terms, this essentially converts the image into a mathematical representation /// of the "broad strokes" of the data, which allows the subsequent hashing step to be more /// robust against changes that may otherwise produce different hashes, such as significant /// edits to portions of the image. /// /// However, on most machines this usually adds an additional 50-100% to the average hash time. /// /// This is a very similar process to JPEG compression, although the implementation is too /// different for this to be optimized specifically for JPEG encoded images. /// /// Further Reading: /// * http://www.hackerfactor.com/blog/?/archives/432-Looks-Like-It.html /// Krawetz describes a "pHash" algorithm which is equivalent to Mean + DCT preprocessing here. /// However there is nothing to say that DCT preprocessing cannot compose with other hash /// algorithms; Gradient + DCT might well perform better in some aspects. /// * https://en.wikipedia.org/wiki/Discrete_cosine_transform #[must_use] pub fn preproc_dct(self) -> Self { Self { dct: true, ..self } } /// Enable preprocessing with the Difference of Gaussians algorithm with default sigma values. /// /// Recommended only for use with [the Blockhash.io algorithm](enum.HashAlg#variant.Blockhash) /// as it significantly reduces entropy in the scaled down image for other algorithms. /// /// See [`Self::preproc_diff_gauss_sigmas()](#method.preproc_diff_gauss_sigmas) for more info. #[must_use] pub fn preproc_diff_gauss(self) -> Self { self.preproc_diff_gauss_sigmas(5.0, 10.0) } /// Enable preprocessing with the Difference of Gaussians algorithm with the given sigma values. /// /// Recommended only for use with [the Blockhash.io algorithm](enum.HashAlg#variant.Blockhash) /// as it significantly reduces entropy in the scaled down image for other algorithms. /// /// After the image is converted to grayscale, it is blurred with a Gaussian blur using /// two different sigmas, and then the images are subtracted from each other. This reduces /// the image to just sharp transitions in luminance, i.e. edges. Varying the sigma values /// changes how sharp the edges are^[citation needed]. /// /// Further reading: /// * https://en.wikipedia.org/wiki/Difference_of_Gaussians /// * http://homepages.inf.ed.ac.uk/rbf/HIPR2/log.htm /// (Difference of Gaussians is an approximation of a Laplacian of Gaussian filter) #[must_use] pub fn preproc_diff_gauss_sigmas(self, sigma_a: f32, sigma_b: f32) -> Self { Self { gauss_sigmas: Some([sigma_a, sigma_b]), ..self } } /// Change the bit order of the resulting hash. /// /// After the image has been turned into a series of bits using the [`hash_alg`](#method.hash_alg) /// this series of bits has to be turned into a hash. There are two major ways this can be done. /// This library defaults to `BitOrder::LsbFirst`, meaning the first bit of the hash algo's output /// forms the least significant bit of the first byte of the hash. This means a hash alog output of /// 1011 0100 results in a hash of 0010 1101 (or 0x2E). For compatability with hashes created by /// other libraries there is the option to instead use `BitOrder::MsbFirst`, which would creat the /// hash 1011 0100 (0xB4) #[must_use] pub fn bit_order(self, bit_order: BitOrder) -> Self { Self { bit_order, ..self } } /// Create a [`Hasher`](struct.Hasher.html) from this config which can be used to hash images. /// /// ### Panics /// If the chosen hash size (`width x height`, rounded for the algorithm if necessary) /// is too large for the chosen container type (`B::max_bits()`). pub fn to_hasher(&self) -> Hasher { let Self { hash_alg, width, height, gauss_sigmas, resize_filter, dct, bit_order, .. } = *self; let (width, height) = hash_alg.round_hash_size(width, height); assert!( (width * height) as usize <= B::max_bits(), "hash size too large for container: {width} x {height}", ); // Blockhash doesn't resize the image so don't waste time calculating coefficients let dct_coeffs = if dct && hash_alg != HashAlg::Blockhash { // calculate the coefficients based on the resize dimensions let (dct_width, dct_height) = hash_alg.resize_dimensions(width, height); Some(DctCtxt::new(dct_width, dct_height)) } else { None }; Hasher { ctxt: HashCtxt { gauss_sigmas, dct_ctxt: dct_coeffs, width, height, resize_filter, bit_order, }, hash_alg, bytes_type: PhantomData, } } } // cannot be derived because of `FilterType` impl fmt::Debug for HasherConfig { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("HasherConfig") .field("width", &self.width) .field("height", &self.height) .field("hash_alg", &self.hash_alg) .field("resize_filter", &debug_filter_type(&self.resize_filter)) .field("gauss_sigmas", &self.gauss_sigmas) .field("use_dct", &self.dct) .field("bit_order", &self.bit_order) .finish() } } /// Generates hashes for images. /// /// Constructed via [`HasherConfig::to_hasher()`](struct.HasherConfig#method.to_hasher). pub struct Hasher> { ctxt: HashCtxt, hash_alg: HashAlg, bytes_type: PhantomData, } impl Hasher where B: HashBytes, { /// Calculate a hash for the given image with the configured options. pub fn hash_image(&self, img: &I) -> ImageHash { let hash = self.hash_alg.hash_image(&self.ctxt, img); ImageHash { hash, __backcompat: (), } } } enum CowImage<'a, I: Image> { Borrowed(&'a I), Owned(I::Buf), } impl<'a, I: Image> CowImage<'a, I> { fn to_grayscale(&self) -> Cow { match *self { CowImage::Borrowed(img) => img.to_grayscale(), CowImage::Owned(ref img) => img.to_grayscale(), } } } enum HashVals { Floats(Vec), Bytes(Vec), } // TODO: implement `Debug`, needs adaptor for `FilterType` struct HashCtxt { gauss_sigmas: Option<[f32; 2]>, dct_ctxt: Option, resize_filter: FilterType, bit_order: BitOrder, width: u32, height: u32, } impl HashCtxt { /// If Difference of Gaussians preprocessing is configured, produce a new image with it applied. fn gauss_preproc<'a, I: Image>(&self, image: &'a I) -> CowImage<'a, I> { if let Some([sigma_a, sigma_b]) = self.gauss_sigmas { let mut blur_a = image.blur(sigma_a); let blur_b = image.blur(sigma_b); blur_a.diff_inplace(&blur_b); CowImage::Owned(blur_a) } else { CowImage::Borrowed(image) } } /// If DCT preprocessing is configured, produce a vector of floats, otherwise a vector of bytes. fn calc_hash_vals(&self, img: &GrayImage, width: u32, height: u32) -> HashVals { if let Some(ref dct_ctxt) = self.dct_ctxt { let img = imageops::resize(img, dct_ctxt.width(), dct_ctxt.height(), self.resize_filter); let img_vals = img.into_vec(); let input_len = img_vals.len() + dct_ctxt.required_scratch(); let mut vals_with_scratch = Vec::with_capacity(input_len); // put the image values in [..width * height] and provide scratch space vals_with_scratch.extend(img_vals.into_iter().map(|x| x as f32)); // TODO: compare with `.set_len()` vals_with_scratch.resize(input_len, 0.); let hash_vals = dct_ctxt.dct_2d(vals_with_scratch); HashVals::Floats(dct_ctxt.crop_2d(hash_vals)) } else { let img = imageops::resize(img, width, height, self.resize_filter); HashVals::Bytes(img.into_vec()) } } } /// A struct representing an image processed by a perceptual hash. /// For efficiency, does not retain a copy of the image data after hashing. /// /// Get an instance with `ImageHash::hash()`. #[derive(PartialEq, Eq, Hash, Debug, Clone)] pub struct ImageHash> { hash: B, __backcompat: (), } /// Error that can happen constructing a `ImageHash` from bytes. #[derive(Debug, PartialEq, Eq)] pub enum InvalidBytesError { /// Byte slice passed to `from_bytes` was the wrong length. BytesWrongLength { /// Number of bytes the `ImageHash` type expected. expected: usize, /// Number of bytes found when parsing the hash bytes. found: usize, }, /// String passed was not valid base64. Base64(base64::DecodeError), } impl ImageHash { /// Get the bytes of this hash. pub fn as_bytes(&self) -> &[u8] { self.hash.as_slice() } /// Create an `ImageHash` instance from the given bytes. /// /// ## Errors: /// Returns a `InvalidBytesError::BytesWrongLength` error if the slice passed can't fit in `B`. pub fn from_bytes(bytes: &[u8]) -> Result, InvalidBytesError> { if bytes.len() * 8 > B::max_bits() { return Err(InvalidBytesError::BytesWrongLength { expected: B::max_bits() / 8, found: bytes.len(), }); } Ok(ImageHash { hash: B::from_iter(bytes.iter().copied()), __backcompat: (), }) } /// Calculate the Hamming distance between this and `other`. /// /// Equivalent to counting the 1-bits of the XOR of the two hashes. /// /// Essential to determining the perceived difference between `self` and `other`. /// /// ### Note /// This return value is meaningless if these two hashes are from different hash sizes or /// algorithms. pub fn dist(&self, other: &Self) -> u32 { BitSet::hamming(&self.hash, &other.hash) } /// Create an `ImageHash` instance from the given Base64-encoded string. /// /// ## Errors: /// Returns `InvalidBytesError::Base64(DecodeError::InvalidLength)` if the string wasn't valid base64. /// Otherwise returns the same errors as `from_bytes`. pub fn from_base64(encoded_hash: &str) -> Result, InvalidBytesError> { let bytes = base64::engine::general_purpose::STANDARD_NO_PAD .decode(encoded_hash) .map_err(InvalidBytesError::Base64)?; Self::from_bytes(&bytes) } /// Get a Base64 string representing the bits of this hash. /// /// Mostly for printing convenience. pub fn to_base64(&self) -> String { base64::engine::general_purpose::STANDARD_NO_PAD.encode(self.hash.as_slice()) } /// Unwraps this `ImageHash` into its inner bytes. /// This is useful if you want to move ownership of the bytes to a new struct. pub fn into_inner(self) -> B { self.hash } } /// Provide Serde a typedef for `image::FilterType`: https://serde.rs/remote-derive.html /// This is automatically checked, if Serde complains then double-check with the original definition #[derive(Serialize, Deserialize)] #[serde(remote = "FilterType")] enum SerdeFilterType { Nearest, Triangle, CatmullRom, Gaussian, Lanczos3, } fn debug_filter_type(ft: &FilterType) -> &'static str { use FilterType::*; match *ft { Triangle => "Triangle", Nearest => "Nearest", CatmullRom => "CatmullRom", Lanczos3 => "Lanczos3", Gaussian => "Gaussian", } } #[cfg(test)] mod test { use image::{ImageBuffer, Rgba}; use rand::{rngs::SmallRng, RngCore, SeedableRng}; use super::{HashAlg, HasherConfig, ImageHash}; type RgbaBuf = ImageBuffer, Vec>; fn gen_test_img(width: u32, height: u32) -> RgbaBuf { let len = (width * height * 4) as usize; let mut buf = Vec::with_capacity(len); unsafe { buf.set_len(len); } // We immediately fill the buffer. let mut rng = SmallRng::seed_from_u64(0xc0ffee); rng.fill_bytes(&mut buf); ImageBuffer::from_raw(width, height, buf).unwrap() } macro_rules! test_hash_equality { ($fnname:ident, $size:expr, $type:ident, $preproc_dct:expr) => { #[test] fn $fnname() { // square, powers of two test_hash_equality!(1024, 1024, $size, $type, $preproc_dct); // rectangular, powers of two test_hash_equality!(512, 256, $size, $type, $preproc_dct); // odd size, square test_hash_equality!(967, 967, $size, $type, $preproc_dct); // odd size, rectangular test_hash_equality!(967, 1023, $size, $type, $preproc_dct); } }; ($width:expr, $height:expr, $size:expr, $type:ident, $preproc_dct:expr) => {{ let test_img = gen_test_img($width, $height); let mut cfg = HasherConfig::new() .hash_alg(HashAlg::$type) .hash_size($size, $size); if $preproc_dct { if HashAlg::$type != HashAlg::Blockhash { cfg = cfg.preproc_dct(); } else { cfg = cfg.preproc_diff_gauss(); } } let hasher = cfg.to_hasher(); let hash1 = hasher.hash_image(&test_img); let hash2 = hasher.hash_image(&test_img); assert_eq!(hash1, hash2); }}; } macro_rules! test_hash_type { ($type:ident, $modname:ident) => { mod $modname { use super::*; test_hash_equality!(hash_eq_8, 8, $type, false); test_hash_equality!(hash_eq_16, 16, $type, false); test_hash_equality!(hash_eq_32, 32, $type, false); test_hash_equality!(hash_eq_8_dct, 8, $type, true); test_hash_equality!(hash_eq_16_dct, 16, $type, true); test_hash_equality!(hash_eq_32_dct, 32, $type, true); } }; } test_hash_type!(Mean, mean); test_hash_type!(Median, median); test_hash_type!(Blockhash, blockhash); test_hash_type!(Gradient, gradient); test_hash_type!(DoubleGradient, dbl_gradient); test_hash_type!(VertGradient, vert_gradient); #[test] fn size() { let test_img = gen_test_img(1024, 1024); let hasher = HasherConfig::new() .hash_alg(HashAlg::Mean) .hash_size(32, 32) .to_hasher(); let hash = hasher.hash_image(&test_img); assert_eq!(32 * 32 / 8, hash.as_bytes().len()); } #[test] fn base64_encoding_decoding() { let test_img = gen_test_img(1024, 1024); let hasher = HasherConfig::new() .hash_alg(HashAlg::Mean) .hash_size(32, 32) .to_hasher(); let hash1 = hasher.hash_image(&test_img); let base64_string = hash1.to_base64(); let decoded_result = ImageHash::from_base64(&*base64_string); assert_eq!(decoded_result.unwrap(), hash1); } } image_hasher-2.0.0/src/traits.rs000064400000000000000000000205051046102023000147040ustar 00000000000000use std::borrow::Cow; use std::ops; use image::{imageops, DynamicImage, GenericImageView, GrayImage, ImageBuffer, Pixel}; use crate::BitOrder; /// Interface for types used for storing hash data. /// /// This is implemented for `Vec`, `Box<[u8]>` and arrays that are multiples/combinations of /// useful x86 bytewise SIMD register widths (64, 128, 256, 512 bits). /// /// Please feel free to open a pull request [on Github](https://github.com/qarmin/img_hash) /// if you need this implemented for a different array size. pub trait HashBytes { /// Construct this type from an iterator of bytes. /// /// If this type has a finite capacity (i.e. an array) then it can ignore extra data /// (the hash API will not create a hash larger than this type can contain). Unused capacity /// **must** be zeroed. fn from_iter>(iter: I) -> Self where Self: Sized; /// Return the maximum capacity of this type, in bits. /// /// If this type has an arbitrary/theoretically infinite capacity, return `usize::max_value()`. fn max_bits() -> usize; /// Get the hash bytes as a slice. fn as_slice(&self) -> &[u8]; } impl HashBytes for Box<[u8]> { fn from_iter>(iter: I) -> Self { // stable in 1.32, effectively the same thing // iter.collect() iter.collect::>().into_boxed_slice() } fn max_bits() -> usize { usize::MAX } fn as_slice(&self) -> &[u8] { self } } impl HashBytes for Vec { fn from_iter>(iter: I) -> Self { iter.collect() } fn max_bits() -> usize { usize::MAX } fn as_slice(&self) -> &[u8] { self } } macro_rules! hash_bytes_array { ($($n:expr),*) => {$( impl HashBytes for [u8; $n] { fn from_iter>(mut iter: I) -> Self { // optimizer should eliminate this zeroing let mut out = [0; $n]; for (src, dest) in iter.by_ref().zip(out.as_mut()) { *dest = src; } out } fn max_bits() -> usize { $n * 8 } fn as_slice(&self) -> &[u8] { self } } )*} } hash_bytes_array!(8, 16, 24, 32, 40, 48, 56, 64); struct BoolsToBytes { iter: I, bit_order: BitOrder, } impl Iterator for BoolsToBytes where I: Iterator, { type Item = u8; fn next(&mut self) -> Option<::Item> { match self.bit_order { BitOrder::LsbFirst => { // starts at the LSB and works up self.iter .by_ref() .take(8) .enumerate() .fold(None, |accum, (n, val)| { accum.or(Some(0)).map(|accum| accum | ((val as u8) << n)) }) } BitOrder::MsbFirst => { // starts at the MSB and works down self.iter .by_ref() .take(8) .enumerate() .fold(None, |accum, (n, val)| { accum .or(Some(0)) .map(|accum| accum | ((val as u8) << (7 - n))) }) } } } fn size_hint(&self) -> (usize, Option) { let (lower, upper) = self.iter.size_hint(); ( lower / 8, // if the upper bound doesn't evenly divide by `8` then we will yield an extra item upper.map(|upper| { if upper % 8 == 0 { upper / 8 } else { upper / 8 + 1 } }), ) } } pub(crate) trait BitSet: HashBytes { fn from_bools>(iter: I, bit_order: BitOrder) -> Self where Self: Sized, { Self::from_iter(BoolsToBytes { iter, bit_order }) } fn hamming(&self, other: &Self) -> u32 { self.as_slice() .iter() .zip(other.as_slice()) .map(|(l, r)| (l ^ r).count_ones()) .sum() } } impl BitSet for T {} /// Shorthand trait bound for APIs in this crate. /// /// Currently only implemented for the types provided by `image` with 8-bit channels. pub trait Image: GenericImageView + 'static { /// The equivalent `ImageBuffer` type for this container. type Buf: Image + DiffImage; /// Grayscale the image, reducing to 8 bit depth and dropping the alpha channel. fn to_grayscale(&self) -> Cow; /// Blur the image with the given `Gaussian` sigma. fn blur(&self, sigma: f32) -> Self::Buf; /// Iterate over the image, passing each pixel's coordinates and values in `u8` to the closure. /// /// The iteration order is unspecified but each pixel **must** be visited exactly _once_. /// /// If the pixel's channels are wider than 8 bits then the values should be scaled to /// `[0, 255]`, not truncated. /// /// ### Note /// If the pixel data length is 2 or 4, the last index is assumed to be the alpha channel. /// A pixel data length outside of `[1, 4]` will cause a panic. fn foreach_pixel8(&self, foreach: F) where F: FnMut(u32, u32, &[u8]); } /// Image types that can be diffed. pub trait DiffImage { /// Subtract the pixel values of `other` from `self` in-place. fn diff_inplace(&mut self, other: &Self); } #[cfg(not(feature = "nightly"))] impl Image for ImageBuffer where P: Pixel, C: ops::Deref, { type Buf = ImageBuffer>; fn to_grayscale(&self) -> Cow { Cow::Owned(imageops::grayscale(self)) } fn blur(&self, sigma: f32) -> Self::Buf { imageops::blur(self, sigma) } fn foreach_pixel8(&self, mut foreach: F) where F: FnMut(u32, u32, &[u8]), { self.enumerate_pixels() .for_each(|(x, y, px)| foreach(x, y, px.channels())); } } #[cfg(feature = "nightly")] impl Image for ImageBuffer where P: Pixel, C: ops::Deref, { type Buf = ImageBuffer>; default fn to_grayscale(&self) -> Cow { Cow::Owned(imageops::grayscale(self)) } default fn blur(&self, sigma: f32) -> Self::Buf { imageops::blur(self, sigma) } default fn foreach_pixel8(&self, mut foreach: F) where F: FnMut(u32, u32, &[u8]), { self.enumerate_pixels() .for_each(|(x, y, px)| foreach(x, y, px.channels())) } } impl DiffImage for ImageBuffer> where P: Pixel, { fn diff_inplace(&mut self, other: &Self) { self.iter_mut().zip(other.iter()).for_each(|(l, r)| { *l = l.wrapping_sub(*r); }); } } impl Image for DynamicImage { type Buf = image::RgbaImage; fn to_grayscale(&self) -> Cow { self.as_luma8() .map_or_else(|| Cow::Owned(self.to_luma8()), Cow::Borrowed) } fn blur(&self, sigma: f32) -> Self::Buf { imageops::blur(self, sigma) } fn foreach_pixel8(&self, mut foreach: F) where F: FnMut(u32, u32, &[u8]), { self.pixels() .for_each(|(x, y, px)| foreach(x, y, px.channels())); } } #[cfg(feature = "nightly")] impl Image for GrayImage { // type Buf = GrayImage; // Avoids copying fn to_grayscale(&self) -> Cow { Cow::Borrowed(self) } } #[test] fn test_bools_to_bytes() { let bools = (0..16).map(|x| x & 1 == 0); let bytes = Vec::from_bools(bools.clone(), BitOrder::LsbFirst); assert_eq!(*bytes, [0b01010101; 2]); let bools_to_bytes = BoolsToBytes { iter: bools, bit_order: BitOrder::LsbFirst, }; assert_eq!(bools_to_bytes.size_hint(), (2, Some(2))); } #[test] fn test_bit_order() { let bools = (0..16).map(|x| x % 3 == 0); let bytes_lsb = Vec::from_bools(bools.clone(), BitOrder::LsbFirst); assert_eq!(*bytes_lsb, [0b01001001, 0b10010010]); let bytes_msb = Vec::from_bools(bools.clone(), BitOrder::MsbFirst); assert_eq!(*bytes_msb, [0b10010010, 0b01001001]); }