czkawka_core-8.0.0/.cargo_vcs_info.json0000644000000001520000000000100135020ustar { "git": { "sha1": "db164d3698198dd46653b1c3bb0384f8a9e38fab" }, "path_in_vcs": "czkawka_core" }czkawka_core-8.0.0/Cargo.toml0000644000000070530000000000100115070ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.79.0" name = "czkawka_core" version = "8.0.0" authors = ["Rafał Mikrut "] build = "build.rs" autobins = false autoexamples = false autotests = false autobenches = false description = "Core of Czkawka app" homepage = "https://github.com/qarmin/czkawka" readme = "README.md" license = "MIT" repository = "https://github.com/qarmin/czkawka" [lib] name = "czkawka_core" path = "src/lib.rs" [dependencies.anyhow] version = "1.0.89" [dependencies.audio_checker] version = "0.1" [dependencies.bincode] version = "1.3" [dependencies.bitflags] version = "2.6" [dependencies.bk-tree] version = "0.5" [dependencies.blake3] version = "1.5" [dependencies.crc32fast] version = "1.4" [dependencies.crossbeam-channel] version = "0.5" [dependencies.directories-next] version = "2.0" [dependencies.ffmpeg_cmdline_utils] version = "0.1" [dependencies.fun_time] version = "0.3" features = ["log"] [dependencies.hamming] version = "0.1" [dependencies.handsome_logger] version = "0.8" [dependencies.humansize] version = "2.1" [dependencies.i18n-embed] version = "0.15" features = [ "fluent-system", "desktop-requester", ] [dependencies.i18n-embed-fl] version = "0.9" [dependencies.image] version = "0.25" features = [ "bmp", "dds", "exr", "ff", "gif", "hdr", "ico", "jpeg", "png", "pnm", "qoi", "tga", "tiff", "webp", ] default-features = false [dependencies.image_hasher] version = "2.0" [dependencies.imagepipe] version = "0.5" [dependencies.infer] version = "0.16" [dependencies.itertools] version = "0.13" [dependencies.jxl-oxide] version = "0.9.0" default-features = false [dependencies.libheif-rs] version = "=0.18.0" optional = true [dependencies.libheif-sys] version = "=1.14.2" optional = true [dependencies.libraw-rs] version = "0.0.4" optional = true [dependencies.lofty] version = "0.21" [dependencies.log] version = "0.4.22" [dependencies.mime_guess] version = "2.0" [dependencies.once_cell] version = "1.20" [dependencies.os_info] version = "3" default-features = false [dependencies.pdf] version = "0.9" [dependencies.rawloader] version = "0.37" [dependencies.rayon] version = "1.10" [dependencies.rust-embed] version = "8.5" features = ["debug-embed"] [dependencies.rusty-chromaprint] version = "0.2" [dependencies.serde] version = "1.0" [dependencies.serde_json] version = "1.0" [dependencies.state] version = "0.6" [dependencies.symphonia] version = "0.5" features = ["all"] [dependencies.tempfile] version = "3.13" [dependencies.trash] version = "5.1" [dependencies.vid_dup_finder_lib] version = "0.1" [dependencies.xxhash-rust] version = "0.8" features = ["xxh3"] [dependencies.zip] version = "2.2" features = [ "aes-crypto", "bzip2", "deflate", "time", ] default-features = false [build-dependencies.rustc_version] version = "0.4" [features] default = [] heif = [ "dep:libheif-rs", "dep:libheif-sys", ] libavif = [ "image/avif-native", "image/avif", ] libraw = ["dep:libraw-rs"] [target."cfg(windows)".dependencies.file-id] version = "=0.2.1" czkawka_core-8.0.0/Cargo.toml.orig000064400000000000000000000053321046102023000151660ustar 00000000000000[package] name = "czkawka_core" version = "8.0.0" authors = ["Rafał Mikrut "] edition = "2021" rust-version = "1.79.0" description = "Core of Czkawka app" license = "MIT" homepage = "https://github.com/qarmin/czkawka" repository = "https://github.com/qarmin/czkawka" build = "build.rs" [dependencies] humansize = "2.1" rayon = "1.10" crossbeam-channel = "0.5" # For saving/loading config files to specific directories directories-next = "2.0" # Needed by similar images image_hasher = "2.0" bk-tree = "0.5" image = { version = "0.25", default-features = false, features = ["bmp", "dds", "exr", "ff", "gif", "hdr", "ico", "jpeg", "png", "pnm", "qoi", "tga", "tiff", "webp"] } hamming = "0.1" # Needed by same music bitflags = "2.6" lofty = "0.21" # Needed by broken files zip = { version = "2.2", features = ["aes-crypto", "bzip2", "deflate", "time"], default-features = false } audio_checker = "0.1" pdf = "0.9" # Needed by audio similarity feature rusty-chromaprint = "0.2" symphonia = { version = "0.5", features = ["all"] } # Hashes for duplicate files blake3 = "1.5" crc32fast = "1.4" xxhash-rust = { version = "0.8", features = ["xxh3"] } tempfile = "3.13" # Video Duplicates vid_dup_finder_lib = "0.1" ffmpeg_cmdline_utils = "0.1" # Saving/Loading Cache serde = "1.0" bincode = "1.3" serde_json = "1.0" # Language i18n-embed = { version = "0.15", features = ["fluent-system", "desktop-requester"] } i18n-embed-fl = "0.9" rust-embed = { version = "8.5", features = ["debug-embed"] } once_cell = "1.20" # Raw image files #rawler = "0.6" #imagepipe = { path = "/home/rafal/test/imagepipe" } rawloader = "0.37" imagepipe = "0.5" libraw-rs = { version = "0.0.4", optional = true } jxl-oxide = { version = "0.9.0", default-features = false } # Checking for invalid extensions mime_guess = "2.0" infer = "0.16" # Heif/Heic libheif-rs = { version = "=0.18.0", optional = true } # Do not upgrade now, since Ubuntu 22.04 not works with newer version libheif-sys = { version = "=1.14.2", optional = true } # 1.14.3 brake compilation on Ubuntu 22.04, so pin it to this version anyhow = { version = "1.0.89" } state = "0.6" trash = "5.1" os_info = { version = "3", default-features = false } log = "0.4.22" handsome_logger = "0.8" fun_time = { version = "0.3", features = ["log"] } itertools = "0.13" # Don't update anymore! This crate has a bug. I've submitted a patch upstream, but the change is breaking. The current code relies on the bug to work correctly! # Warning by CalunVier 2024.7.15 [target.'cfg(windows)'.dependencies] file-id = "=0.2.1" [build-dependencies] rustc_version = "0.4" [features] default = [] heif = ["dep:libheif-rs", "dep:libheif-sys"] libraw = ["dep:libraw-rs"] libavif = ["image/avif-native", "image/avif"] czkawka_core-8.0.0/LICENSE000064400000000000000000000020621046102023000133010ustar 00000000000000MIT License Copyright (c) 2020-2024 Rafał Mikrut Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.czkawka_core-8.0.0/README.md000064400000000000000000000000761046102023000135560ustar 00000000000000# Czkawka Core Core of Czkawka GUI/CLI and Krokiet projects. czkawka_core-8.0.0/build.rs000064400000000000000000000011501046102023000137360ustar 00000000000000fn main() { let rust_version = match rustc_version::version_meta() { Ok(meta) => { let rust_v = meta.semver.to_string(); let rust_date = meta.commit_date.unwrap_or_default(); format!("{rust_v} ({rust_date})") } Err(_) => "".to_string(), }; println!("cargo:rustc-env=RUST_VERSION_INTERNAL={rust_version}"); // Find if app is build with cranelift if let Ok(codegen) = std::env::var("CARGO_PROFILE_RELEASE_CODEGEN_UNITS") { if codegen == "1" { println!("cargo:rustc-env=USING_CRANELIFT=1"); } } } czkawka_core-8.0.0/data/com.github.qarmin.czkawka.desktop000064400000000000000000000012611046102023000215570ustar 00000000000000[Desktop Entry] Type=Application Terminal=false Exec=czkawka_gui Name=Czkawka Name[it]=Singhiozzo Comment=Multi functional app to clean OS which allow to find duplicates, empty folders, similar files etc. Comment[it]=Programma multifunzionale per pulire il sistema, che permette di trovare file duplicati, cartelle vuote, file simili, ecc... Comment[zh_CN]=可用于清理文件副本、空文件夹、相似文件等的系统清理工具 Comment[zh_TW]=可用於清理重複檔案、空資料夾、相似檔案等的系統清理工具 Icon=com.github.qarmin.czkawka Categories=System;FileTools Keywords=Hiccup;duplicate;same;similar;cleaner StartupWMClass=czkawka_gui TryExec=czkawka_gui czkawka_core-8.0.0/data/com.github.qarmin.czkawka.metainfo.xml000064400000000000000000000031141046102023000225060ustar 00000000000000 com.github.qarmin.czkawka Czkawka Multi functional app to find duplicates, empty folders, similar images, broken files etc. CC0-1.0 MIT

Czkawka is simple, fast and easy to use app to remove unnecessary files from your computer.

com.github.qarmin.czkawka.desktop https://user-images.githubusercontent.com/41945903/147875238-7f82fa27-c6dd-47e7-87ed-e253fb2cbc3e.png https://user-images.githubusercontent.com/41945903/147875239-bcf9776c-885d-45ac-ba82-5a426d8e1647.png https://user-images.githubusercontent.com/41945903/147875243-e654e683-37f7-46fa-8321-119a4c5775e7.png Rafał Mikrut Rafał Mikrut https://github.com/qarmin/czkawka https://github.com/qarmin/czkawka/issues https://github.com/sponsors/qarmin https://crowdin.com/project/czkawka
czkawka_core-8.0.0/data/icons/com.github.qarmin.czkawka-symbolic.svg000064400000000000000000000117071046102023000236450ustar 00000000000000 czkawka_core-8.0.0/data/icons/com.github.qarmin.czkawka.Devel.svg000064400000000000000000000421761046102023000230700ustar 00000000000000 czkawka_core-8.0.0/data/icons/com.github.qarmin.czkawka.svg000064400000000000000000000240101046102023000220150ustar 00000000000000 czkawka_core-8.0.0/i18n/ar/czkawka_core.ftl000064400000000000000000000060511046102023000166310ustar 00000000000000# Core core_similarity_original = الأصل core_similarity_very_high = عالية جدا core_similarity_high = مرتفع core_similarity_medium = متوسط core_similarity_small = صغير core_similarity_very_small = صغير جدا core_similarity_minimal = الحد الأدنى core_cannot_open_dir = لا يمكن فتح dir { $dir }، السبب { $reason } core_cannot_read_entry_dir = لا يمكن قراءة الإدخال في dir { $dir }، السبب { $reason } core_cannot_read_metadata_dir = لا يمكن قراءة البيانات الوصفية في dir { $dir }، السبب { $reason } core_file_not_utf8_name = الملف { $name } ليس لديه اسم UTF-8 صالح (قد لا تظهر بعض الأحرف) core_file_modified_before_epoch = يبدو أن الملف { $name } قد تم تعديله قبل يونكس Epoch core_folder_modified_before_epoch = يبدو أن المجلد { $name } قد تم تعديله قبل يونكس Epoch core_file_no_modification_date = غير قادر على الحصول على تاريخ التعديل من الملف { $name }، السبب { $reason } core_folder_no_modification_date = غير قادر على الحصول على تاريخ التعديل من المجلد { $name }، السبب { $reason } core_missing_no_chosen_included_directory = يجب توفير دليل واحد على الأقل core_directory_wildcard_no_supported = الأدلة: البطاقات البرية في المسار غير مدعومة، تجاهل { $path } core_directory_must_exists = الأدلة: يجب أن يكون مسار المجلد المتوفر موجودا، تجاهل { $path } core_directory_must_be_directory = الأدلة: المسار المقدم يجب أن يشير إلى الدليل، تجاهل { $path } core_included_directory_zero_valid_directories = خطأ في الدليل المضمن: لا يوجد حتى مسار واحد صحيح للإدراج المطلوب core_excluded_directory_pointless_slash = الأدلة: استبعاد / لا معنى له، لأنه يعني أنه لن يتم مسح أي ملفات core_directory_overlap = الأدلة: جميع الدلائل للبحث عن التداخل مع الدلائل المستبعدة core_directory_unable_to_get_device_id = الأدلة: غير قادر على الحصول على معرف الجهاز من المجلد { $path } core_ffmpeg_not_found = لا يمكن العثور على التثبيت الصحيح لFFmpeg core_ffmpeg_not_found_windows = تأكد من أن ffmpeg.exe و ffprobe.exe متاحان في PATH أو يتم وضعهما مباشرة لنفس المجلد حيث التطبيق قابل للتنفيذ core_ffmpeg_missing_in_snap = مقاطع فيديو مشابهة لا تعمل حاليا مع السعادة، إذا كنت تريد المساعدة في النظر - { $url } core_saving_to_cache = تم الحفظ في الملف { $number } إدخالات ذاكرة التخزين المؤقت core_loading_from_cache = تحميل من ذاكرة التخزين المؤقت { $number } إدخالات czkawka_core-8.0.0/i18n/bg/czkawka_core.ftl000064400000000000000000000072261046102023000166240ustar 00000000000000# Core core_similarity_original = Оригинален core_similarity_very_high = Много висок core_similarity_high = Висок core_similarity_medium = Среден core_similarity_small = Малък core_similarity_very_small = Много малък core_similarity_minimal = Минимален core_cannot_open_dir = Не може да се отвори папка { $dir }, причината е { $reason } core_cannot_read_entry_dir = Не може да се прочете папка { $dir }, причината е { $reason } core_cannot_read_metadata_dir = Не могат да се прочетат мета-данните в папка { $dir }, причината е { $reason } core_file_not_utf8_name = Файлът { $name } няма валидно UTF-8 име (някои от символите може да не се визуализират) core_file_modified_before_epoch = Файлът { $name } изглежда да е променен преди Unix Epoc core_folder_modified_before_epoch = Папка { $name } изглежда да е променена преди Unix Epoc core_file_no_modification_date = Невъзможно е да се получи променената дата от файл { $name }, причината е { $reason } core_folder_no_modification_date = Невъзможно е да се извлече променената дата от файл { $name }, причината е { $reason } core_missing_no_chosen_included_directory = Трябва да се предостави поне една директория core_directory_wildcard_no_supported = Директории: Не се поддържат заместващи знаци в пътя, игнорирайки { $path } core_directory_must_exists = Директории: Предоставеният път до папката трябва да съществува, като се игнорира { $path } core_directory_must_be_directory = Директории: Предоставеният път трябва да сочи към директорията, като не се взема под внимание { $path } core_included_directory_zero_valid_directories = Включена директория ГРЕШКА: Не е намерен дори един правилен път към включената директория, която се изисква core_excluded_directory_pointless_slash = Директории: Изключването на / е безсмислено, защото означава, че няма да бъдат сканирани никакви файлове core_directory_overlap = Директории: Всички директории за търсене се припокриват с изключените директории core_directory_unable_to_get_device_id = Директории: Невъзможно е да се получи идентификатор на устройството от папка { $path } core_ffmpeg_not_found = Не мога да намеря правилната инсталация на FFmpeg core_ffmpeg_not_found_windows = Уверете се, че ffmpeg.exe и ffprobe.exe са налични в PATH или са поставени директно в същата папка, където е изпълнимото приложение core_ffmpeg_missing_in_snap = Подобни видеоклипове не работят в момента със snap, ако искате помощ, погледнете - { $url } core_saving_to_cache = Запазени във файл {$number } записи в кеша core_loading_from_cache = Заредени от кеш { $number } вписвания czkawka_core-8.0.0/i18n/cs/czkawka_core.ftl000064400000000000000000000047311046102023000166370ustar 00000000000000# Core core_similarity_original = Originál core_similarity_very_high = Velmi vysoká core_similarity_high = Vysoká core_similarity_medium = Střední core_similarity_small = Malá core_similarity_very_small = Velmi malá core_similarity_minimal = Minimální core_cannot_open_dir = Nelze otevřít adresář { $dir }, důvod { $reason } core_cannot_read_entry_dir = Nelze načíst záznam v adresáři { $dir }, důvod { $reason } core_cannot_read_metadata_dir = Nelze načíst metadata v adresáři { $dir }, důvod { $reason } core_file_not_utf8_name = Soubor { $name } nemá platný název UTF-8 (některé znaky nemusí být zobrazeny) core_file_modified_before_epoch = Soubor { $name } se zdá být upraven před unixovým Epochem (1.1.1970) core_folder_modified_before_epoch = Složka { $name } se zdá být upravena před unixovým Epochem (1.1.1970) core_file_no_modification_date = Nelze získat datum úpravy ze souboru { $name }, důvod { $reason } core_folder_no_modification_date = Nelze získat datum úpravy ze složky { $name }, důvod { $reason } core_missing_no_chosen_included_directory = Musí být uveden alespoň jeden adresář core_directory_wildcard_no_supported = Adresáře: Zástupné znaky v cestě nejsou podporovány, ignoruji { $path } core_directory_must_exists = Adresáře: Poskytnutá cesta ke složce musí existovat, ignoruji { $path } core_directory_must_be_directory = Adresáře: Poskytnutá cesta musí směřovat do adresáře, ignoruji { $path } core_included_directory_zero_valid_directories = CHYBA zahrnutí adresáře: Nenalezena ani jedna správná cesta k zahrnutí, která je vyžadována core_excluded_directory_pointless_slash = Adresáře: Vyloučení / je bezúčelné, protože to znamená, že žádné soubory nebudou naskenovány core_directory_overlap = Adresáře: Všechny adresáře pro vyhledávání se překrývají s vyloučením adresářů core_directory_unable_to_get_device_id = Adresáře: Nelze získat ID zařízení ze složky { $path } core_ffmpeg_not_found = Nelze najít správnou instalaci FFmpeg core_ffmpeg_not_found_windows = Ujistěte se, že ffmpeg.exe a ffprobe.exe jsou k dispozici v PATH nebo jsou umístěny přímo do stejné složky, kde lze spustit aplikaci core_ffmpeg_missing_in_snap = Podobná videa v současné době nefungují se snapem, pokud chcete nápovědu sledovat - { $url } core_saving_to_cache = Uloženo do souboru { $number } položek mezipaměti core_loading_from_cache = Načteno z { $number } položek keše czkawka_core-8.0.0/i18n/de/czkawka_core.ftl000064400000000000000000000051331046102023000166170ustar 00000000000000# Core core_similarity_original = Original core_similarity_very_high = Sehr Hoch core_similarity_high = Hoch core_similarity_medium = Mittel core_similarity_small = Klein core_similarity_very_small = Sehr klein core_similarity_minimal = Minimal core_cannot_open_dir = Verzeichnis { $dir } kann nicht geöffnet werden, Grund { $reason } core_cannot_read_entry_dir = Kann Eintrag in Verzeichnis { $dir } nicht lesen, Grund { $reason } core_cannot_read_metadata_dir = Metadaten können in Verzeichnis { $dir } nicht gelesen werden, Grund { $reason } core_file_not_utf8_name = Datei { $name } hat keinen gültigen UTF-8-Namen (einige Zeichen könnten nicht angezeigt werden) core_file_modified_before_epoch = Datei { $name } scheint vor dieser Unix-Epoche geändert worden zu sein core_folder_modified_before_epoch = Ordner { $name } scheint vor dieser Unix-Epoche geändert worden zu sein core_file_no_modification_date = Konnte das Änderungsdatum von Datei { $name } nicht abrufen, Grund { $reason } core_folder_no_modification_date = Konnte das Änderungsdatum aus dem Ordner { $name } nicht abrufen, Grund { $reason } core_missing_no_chosen_included_directory = Mindestens ein Verzeichnis muss angegeben werden core_directory_wildcard_no_supported = Verzeichnisse: Wildcards im Pfad werden nicht unterstützt, { $path } wird ignoriert core_directory_must_exists = Verzeichnisse: Der angegebene Ordnerpfad muss existieren, { $path } wird ignoriert core_directory_must_be_directory = Verzeichnisse: Der angegebene Pfad muss auf das Verzeichnis zeigen, { $path } wird ignoriert core_included_directory_zero_valid_directories = Einbezogenes Verzeichnis-FEHLER: Kein korrekter Pfad gefunden, welcher einbezogen werden soll, was erforderlich ist core_excluded_directory_pointless_slash = Verzeichnisse: / auszuschließen ist sinnlos, weil somit keine Dateien gescannt werden core_directory_overlap = Verzeichnisse: Alle zu durchsuchende Verzeichnisse überlappen mit den ausgeschlossenen Verzeichnissen core_directory_unable_to_get_device_id = Verzeichnisse: Geräte-ID kann nicht aus dem Ordner { $path } geholt werden core_ffmpeg_not_found = Keine richtige Installation von FFmpeg gefunden core_ffmpeg_not_found_windows = Stellen Sie sicher, dass ffmpeg.exe und ffprobe.exe in PATH verfügbar sind oder direkt in den gleichen Ordner gelegt werden, in dem die App ausführbar ist core_ffmpeg_missing_in_snap = Ähnliche Videos funktionieren derzeit nicht mit Snap, wenn du Hilfe möchtest, besuche - { $url } core_saving_to_cache = { $number } Cache-Einträge in der Datei gespeichert core_loading_from_cache = { $number } Einträge aus dem Cache geladen czkawka_core-8.0.0/i18n/el/czkawka_core.ftl000064400000000000000000000073111046102023000166270ustar 00000000000000# Core core_similarity_original = Αρχικό core_similarity_very_high = Πολύ Υψηλή core_similarity_high = Υψηλή core_similarity_medium = Μεσαίο core_similarity_small = Μικρό core_similarity_very_small = Πολύ Μικρό core_similarity_minimal = Ελάχιστα core_cannot_open_dir = Αδυναμία ανοίγματος dir { $dir }, λόγος { $reason } core_cannot_read_entry_dir = Αδυναμία ανάγνωσης καταχώρησης στον κατάλογο { $dir }, λόγος { $reason } core_cannot_read_metadata_dir = Αδύνατη η ανάγνωση μεταδεδομένων στον κατάλογο { $dir }, λόγος { $reason } core_file_not_utf8_name = Το αρχείο { $name } δεν έχει ένα έγκυρο όνομα UTF-8 (ορισμένοι χαρακτήρες μπορεί να μην εμφανίζονται) core_file_modified_before_epoch = Το { $name } φαίνεται να τροποποιείται πριν το Unix Epoch core_folder_modified_before_epoch = Ο φάκελος { $name } φαίνεται να τροποποιείται πριν το Unix Epoch core_file_no_modification_date = Δεν είναι δυνατή η λήψη ημερομηνίας τροποποίησης από το αρχείο { $name }, λόγος { $reason } core_folder_no_modification_date = Δεν είναι δυνατή η λήψη ημερομηνίας τροποποίησης από το φάκελο { $name }, λόγος { $reason } core_missing_no_chosen_included_directory = Πρέπει να παρέχεται τουλάχιστον ένας κατάλογος core_directory_wildcard_no_supported = Κατάλογοι: Δεν υποστηρίζονται μπαλαντέρ στο μονοπάτι, αγνοώντας { $path } core_directory_must_exists = Κατάλογοι: Η παρεχόμενη διαδρομή φακέλου πρέπει να υπάρχει, αγνοώντας { $path } core_directory_must_be_directory = Κατάλογοι: Παρέχεται διαδρομή πρέπει να δείχνει στον κατάλογο, αγνοώντας { $path } core_included_directory_zero_valid_directories = Συμπεριλαμβανόμενος κατάλογος ΣΦΑΛΜΑ: Δεν βρέθηκε ούτε μια σωστή διαδρομή για να συμπεριληφθεί η οποία απαιτείται core_excluded_directory_pointless_slash = Κατάλογοι: Εξαιρούνται / είναι άσκοπες, επειδή σημαίνει ότι δεν θα σαρωθούν αρχεία core_directory_overlap = Κατάλογοι: Όλοι οι κατάλογοι για αναζήτηση επικαλύψεων με αποκλεισμένους καταλόγους core_directory_unable_to_get_device_id = Κατάλογοι: Αδυναμία λήψης id συσκευής από το φάκελο { $path } core_ffmpeg_not_found = Αδυναμία εύρεσης σωστής εγκατάστασης του FFmpeg core_ffmpeg_not_found_windows = Να είστε βέβαιος ότι ffmpeg.exe και ffprobe.exe είναι διαθέσιμα σε PATH ή τίθενται απευθείας στον ίδιο φάκελο όπου είναι εκτελέσιμο app core_ffmpeg_missing_in_snap = Παρόμοια βίντεο δεν λειτουργούν αυτή τη στιγμή με συμπληρωματικό πρόγραμμα, αν θέλετε να δείτε βοήθεια - { $url } core_saving_to_cache = Αποθηκεύτηκε στο αρχείο καταχωρήσεις { $number } cache core_loading_from_cache = Φορτώθηκε από καταχωρήσεις της λανθάνουσας μνήμης { $number } czkawka_core-8.0.0/i18n/en/czkawka_core.ftl000064400000000000000000000043561046102023000166370ustar 00000000000000# Core core_similarity_original = Original core_similarity_very_high = Very High core_similarity_high = High core_similarity_medium = Medium core_similarity_small = Small core_similarity_very_small = Very Small core_similarity_minimal = Minimal core_cannot_open_dir = Cannot open dir {$dir}, reason {$reason} core_cannot_read_entry_dir = Cannot read entry in dir {$dir}, reason {$reason} core_cannot_read_metadata_dir = Cannot read metadata in dir {$dir}, reason {$reason} core_file_not_utf8_name = File {$name} does not have a valid UTF-8 name (some characters may not be shown) core_file_modified_before_epoch = File {$name} seems to be modified before Unix Epoch core_folder_modified_before_epoch = Folder {$name} seems to be modified before Unix Epoch core_file_no_modification_date = Unable to get modification date from file {$name}, reason {$reason} core_folder_no_modification_date = Unable to get modification date from folder {$name}, reason {$reason} core_missing_no_chosen_included_directory = At least one directory must be provided core_directory_wildcard_no_supported = Directories: Wildcards in path are not supported, ignoring { $path } core_directory_must_exists = Directories: Provided folder path must exist, ignoring { $path } core_directory_must_be_directory = Directories: Provided path must point at the directory, ignoring { $path } core_included_directory_zero_valid_directories = Included Directory ERROR: Not found even one correct path to included which is required core_excluded_directory_pointless_slash = Directories: Excluding / is pointless, because it means that no files will be scanned core_directory_overlap = Directories: All directories to search overlaps with excluded directories core_directory_unable_to_get_device_id = Directories: Unable to get device id from folder { $path } core_ffmpeg_not_found = Cannot find proper installation of FFmpeg core_ffmpeg_not_found_windows = Be sure that ffmpeg.exe and ffprobe.exe are available in PATH or are put directly to same folder where is app executable core_ffmpeg_missing_in_snap = Similar Videos don't work currently with snap, if you want help look at - { $url } core_saving_to_cache = Saved to file { $number } cache entries core_loading_from_cache = Loaded from cache { $number } entries czkawka_core-8.0.0/i18n/es/czkawka_core.ftl000064400000000000000000000047671046102023000166520ustar 00000000000000# Core core_similarity_original = Original core_similarity_very_high = Muy alta core_similarity_high = Alta core_similarity_medium = Medio core_similarity_small = Pequeño core_similarity_very_small = Muy pequeño core_similarity_minimal = Mínimo core_cannot_open_dir = No se puede abrir el directorio { $dir }, razón { $reason } core_cannot_read_entry_dir = No se puede leer la entrada en directorio { $dir }, razón { $reason } core_cannot_read_metadata_dir = No se pueden leer metadatos en el directorio { $dir }, razón { $reason } core_file_not_utf8_name = El archivo { $name } no tiene un nombre UTF-8 válido (algunos caracteres pueden no mostrarse) core_file_modified_before_epoch = El archivo { $name } parece ser modificado antes de Unix Epoch core_folder_modified_before_epoch = La carpeta { $name } parece ser modificada antes del Epoch Unix core_file_no_modification_date = No se puede obtener la fecha de modificación del archivo { $name }, razón { $reason } core_folder_no_modification_date = No se puede obtener la fecha de modificación de la carpeta { $name }, razón { $reason } core_missing_no_chosen_included_directory = Debe proporcionarse al menos un directorio core_directory_wildcard_no_supported = Directorios: Los comodines en la ruta no son compatibles, ignorando { $path } core_directory_must_exists = Directorios: La ruta de la carpeta debe salir, ignorando { $path } core_directory_must_be_directory = Directorios: La ruta proporcionada debe apuntar al directorio, ignorando { $path } core_included_directory_zero_valid_directories = ERROR del directorio incluido: No se ha encontrado ni una ruta correcta a incluida que es necesaria core_excluded_directory_pointless_slash = Directorios: Excluyendo / es inútil, ya que no se analizarán archivos core_directory_overlap = Directorios: Todos los directorios para buscar superposiciones con directorios excluidos core_directory_unable_to_get_device_id = Directorios: No se puede obtener el id del dispositivo de la carpeta { $path } core_ffmpeg_not_found = No se puede encontrar la instalación correcta de FFmpeg core_ffmpeg_not_found_windows = Asegúrese de que ffmpeg.exe y ffprobe.exe están disponibles en PATH o se colocan directamente en la misma carpeta donde es ejecutable la aplicación core_ffmpeg_missing_in_snap = Los Videos Similares no funcionan actualmente con el snap, si quieres ayuda mira - { $url } core_saving_to_cache = Guardado en el archivo { $number } entradas de caché core_loading_from_cache = Cargado desde { $number } entradas de caché czkawka_core-8.0.0/i18n/fr/czkawka_core.ftl000064400000000000000000000052441046102023000166410ustar 00000000000000# Core core_similarity_original = Originale core_similarity_very_high = Très haute core_similarity_high = Haute core_similarity_medium = Moyenne core_similarity_small = Basse core_similarity_very_small = Très basse core_similarity_minimal = Minimale core_cannot_open_dir = Impossible d’ouvrir le répertoire { $dir }. Raison : { $reason } core_cannot_read_entry_dir = Impossible de lire l'entrée dans le répertoire { $dir }. Raison : { $reason } core_cannot_read_metadata_dir = Impossible de lire les métadonnées dans le répertoire { $dir }. Raison  : { $reason } core_file_not_utf8_name = Le fichier { $name } n'a pas de nom UTF-8 valide (certains caractères peuvent ne pas être affichés) core_file_modified_before_epoch = Le fichier { $name } semble avoir été modifié avant l'epoch Unix core_folder_modified_before_epoch = Le dossier { $name } semble avoir été modifié avant l'epoch Unix core_file_no_modification_date = Impossible d'obtenir la date de modification du fichier { $name }. Raison  : { $reason } core_folder_no_modification_date = Impossible d'obtenir la date de modification du dossier { $name }. Raison : { $reason } core_missing_no_chosen_included_directory = Au moins un répertoire doit être fourni core_directory_wildcard_no_supported = Répertoires : les jokers dans le chemin ne sont pas pris en charge. { $path } est ignoré core_directory_must_exists = Répertoires : le chemin du dossier fourni doit exister. { $path } est ignoré core_directory_must_be_directory = Répertoires : le chemin fourni doit pointer vers le répertoire, { $path } est ignoré core_included_directory_zero_valid_directories = ERREUR de répertoire inclus : aucun chemin correct n'a été trouvé alors qu'au moins un est nécessaire core_excluded_directory_pointless_slash = Répertoires: exclure « / » est inutile car cela signifie qu'aucun fichier ne sera scanné core_directory_overlap = Répertoires : tous les répertoires dans lesquels rechercher des chevauchements avec des répertoires exclus core_directory_unable_to_get_device_id = Répertoires : impossible d'obtenir l'ID de l'appareil depuis le dossier { $path } core_ffmpeg_not_found = Impossible de trouver une installation correcte de FFmpeg core_ffmpeg_not_found_windows = Assurez-vous que ffmpeg.exe et ffprobe.exe sont disponibles dans PATH ou sont présents dans le même dossier que l'exécutable de l'application core_ffmpeg_missing_in_snap = Les vidéos similaires ne fonctionnent pas actuellement avec snap. Si vous voulez de l'aide référez vous à - { $url } core_saving_to_cache = { $number } entrées du cache enregistres dans un fichier core_loading_from_cache = { $number } entrées chargées depuis le cache czkawka_core-8.0.0/i18n/it/czkawka_core.ftl000064400000000000000000000050301046102023000166370ustar 00000000000000# Core core_similarity_original = Originali core_similarity_very_high = Altissima core_similarity_high = Alta core_similarity_medium = Media core_similarity_small = Piccola core_similarity_very_small = Piccolissima core_similarity_minimal = Minima core_cannot_open_dir = Impossibile aprire cartella { $dir }, motivo { $reason } core_cannot_read_entry_dir = Impossibile leggere elemento nella cartella { $dir }, ragione { $reason } core_cannot_read_metadata_dir = Impossibile leggere metadati nella cartella { $dir }, ragione { $reason } core_file_not_utf8_name = Il file { $name } non ha un nome UTF-8 valido (alcuni caratteri potrebbero non essere mostrati) core_file_modified_before_epoch = Il file { $name } sembra essere stato modificato prima dell'Epoca Unix core_folder_modified_before_epoch = La cartella { $name } sembra essere stato modificata prima dell'Epoca Unix core_file_no_modification_date = Impossibile recuperare data di modifica dal file { $name }, ragione { $reason } core_folder_no_modification_date = Impossibile recuperare data di modifica dalla cartella { $name }, ragione { $reason } core_missing_no_chosen_included_directory = Almeno una directory deve essere fornita core_directory_wildcard_no_supported = Cartelle: i caratteri jolly nel percorso non sono supportati, ignorando { $path } core_directory_must_exists = Directories: Il percorso della cartella fornito deve uscire, ignorando { $path } core_directory_must_be_directory = Directories: Il percorso fornito deve puntare alla directory, ignorando { $path } core_included_directory_zero_valid_directories = ERRORE Directory incluso: Non trovato nemmeno un percorso corretto incluso che è richiesto core_excluded_directory_pointless_slash = Cartelle: Escludere / è inutile, perché significa che nessun file verrà scansionato core_directory_overlap = Directories: Tutte le directory per cercare sovrapposizioni con directory escluse core_directory_unable_to_get_device_id = Directory: non è possibile ottenere l'id del dispositivo dalla cartella { $path } core_ffmpeg_not_found = Impossibile trovare la corretta installazione di FFmpeg core_ffmpeg_not_found_windows = Quando si utilizza Windows essere sicuri che ffmpeg.exe e ffprobe.exe sono disponibili in PATH o sono messi direttamente nella stessa cartella dove è eseguibile l'applicazione core_ffmpeg_missing_in_snap = Video simili non funzionano attualmente con snap, se si desidera aiutare a guardare - { $url } core_saving_to_cache = Salvato nel file { $number } voci cache core_loading_from_cache = Caricato dalla cache { $number } voci czkawka_core-8.0.0/i18n/ja/czkawka_core.ftl000064400000000000000000000057311046102023000166250ustar 00000000000000# Core core_similarity_original = 新規に作成 core_similarity_very_high = 非常に高い core_similarity_high = 高い core_similarity_medium = ミディアム core_similarity_small = 小 core_similarity_very_small = 非常に小さい core_similarity_minimal = 最小 core_cannot_open_dir = ディレクトリを開くことができません { $dir }、理由 { $reason } core_cannot_read_entry_dir = Dir { $dir } でエントリを読み込めません、理由 { $reason } core_cannot_read_metadata_dir = Dir { $dir } でメタデータを読み込めません、理由 { $reason } core_file_not_utf8_name = ファイル { $name } に有効な UTF-8 名がありません (一部の文字は表示されない可能性があります) core_file_modified_before_epoch = ファイル { $name } は Unix Epoch より前に変更されているようです core_folder_modified_before_epoch = フォルダ { $name } は、Unix Epoch の前に変更されているようです core_file_no_modification_date = ファイル { $name } から変更日を取得できません、理由 { $reason } core_folder_no_modification_date = フォルダ { $name } から変更日を取得できません、理由 { $reason } core_missing_no_chosen_included_directory = 少なくとも 1 つのディレクトリを指定する必要があります。 core_directory_wildcard_no_supported = ディレクトリ: パス内のワイルドカードはサポートされていません。 { $path } を無視してください core_directory_must_exists = ディレクトリ: 指定されたフォルダパスは、 { $path } を無視して終了する必要があります core_directory_must_be_directory = ディレクトリ: 指定されたパスはディレクトリを指す必要があります。 { $path } を無視します core_included_directory_zero_valid_directories = 含まれるディレクトリエラー: 必須の正しいパスが1つも見つかりません core_excluded_directory_pointless_slash = ディレクトリ: ファイルがスキャンされないことを意味するため、除外/無意味です core_directory_overlap = ディレクトリ: 除外されたディレクトリとオーバーラップを検索するすべてのディレクトリ core_directory_unable_to_get_device_id = ディレクトリ: フォルダ { $path } からデバイス ID を取得できません core_ffmpeg_not_found = 適切なFFmpegのインストールが見つかりません core_ffmpeg_not_found_windows = ffmpeg.exeとffprobe.exeがPATHで使用できることを確認するか、アプリ実行ファイルのある同じフォルダに直接配置してください。 core_ffmpeg_missing_in_snap = ヘルプを見たい場合は、現在同様のビデオはスナップでは動作しません - { $url } core_saving_to_cache = { $number } 個のキャッシュエントリをファイルに保存しました core_loading_from_cache = キャッシュから { $number } 個のエントリが読み込まれました czkawka_core-8.0.0/i18n/ko/czkawka_core.ftl000064400000000000000000000054601046102023000166430ustar 00000000000000# Core core_similarity_original = 원본 core_similarity_very_high = 매우 높음 core_similarity_high = 높음 core_similarity_medium = 보통 core_similarity_small = 낮음 core_similarity_very_small = 매우 낮음 core_similarity_minimal = 최소 core_cannot_open_dir = { $dir } 디렉터리를 열 수 없습니다. 이유: { $reason } core_cannot_read_entry_dir = { $dir } 디렉터리를 열 수 없습니다. 이유: { $reason } core_cannot_read_metadata_dir = { $dir } 디렉터리의 메타데이터를 열 수 없습니다. 이유: { $reason } core_file_not_utf8_name = 파일 이름 "{ $name }"은 유효한 UTF-8 이름이 아닙니다. 일부 글자가 보이지 않을 수 있습니다. core_file_modified_before_epoch = { $name } 파일이 Unix 시간 이전에 수정된 것 같습니다. core_folder_modified_before_epoch = { $name } 폴더가 Unix 시간 이전에 수정된 것 같습니다. core_file_no_modification_date = { $name } 파일의 수정된 시각을 읽을 수 없습니다. 이유: { $reason } core_folder_no_modification_date = { $name } 폴더의 수정된 시각을 읽을 수 없습니다. 이유: { $reason } core_missing_no_chosen_included_directory = 적어도 1개 이상의 디렉터리가 주어져야 합니다. core_directory_wildcard_no_supported = 디렉터리: 경로에는 와일드 카드가 지원되지 않습니다. "{ $path }"는 무시됩니다. core_directory_must_exists = 디렉터리: 주어진 폴더 경로는 반드시 존재해야 합니다. "{ $path }"는 무시됩니다. core_directory_must_be_directory = 디렉터리: 주어진 경로는 디렉터리를 가리켜야 합니다. "{ $path }"는 무시됩니다. core_included_directory_zero_valid_directories = 검색 대상 디렉터리 오류: 적어도 1개 이상의 유효한 경로가 주어져야 합니다. 유효한 경로가 하나도 없습니다. core_excluded_directory_pointless_slash = 디렉터리: "/"를 제외하는 것은 아무런 파일도 스캔하지 않는다는 것이므로, 의미가 없습니다. core_directory_overlap = 디렉터리: 모든 주어진 경로가 검색 제외 경로와 겹칩니다. core_directory_unable_to_get_device_id = 디렉터리: { $path }의 장치 ID를 가져올 수 없습니다. core_ffmpeg_not_found = 유효한 FFmpeg 설치를 발견하지 못했습니다. core_ffmpeg_not_found_windows = ffmpeg.exe와 ffprobe.exe가 시스템 변수 PATH에서 사용 가능하거나, 이 프로그램의 경로와 같은 곳에 위치하는지 확인하세요. core_ffmpeg_missing_in_snap = 현재 ffmpeg snap에서는 유사한 영상 검색이 지원되지 않습니다. 더 많은 정보는 { $url }에서 확인하세요. core_saving_to_cache = { $number }개의 파일을 캐시에 저장했습니다. core_loading_from_cache = { $number }개의 파일을 캐시에서 불러왔습니다. czkawka_core-8.0.0/i18n/nl/czkawka_core.ftl000064400000000000000000000044541046102023000166450ustar 00000000000000# Core core_similarity_original = Origineel core_similarity_very_high = Zeer hoog core_similarity_high = hoog core_similarity_medium = Middelgroot core_similarity_small = Klein core_similarity_very_small = Zeer Klein core_similarity_minimal = Minimaal core_cannot_open_dir = Kan dir { $dir }niet openen, reden { $reason } core_cannot_read_entry_dir = Kan invoer niet lezen in map { $dir }, reden { $reason } core_cannot_read_metadata_dir = Kan metadata niet lezen in map { $dir }, reden { $reason } core_file_not_utf8_name = Bestand { $name } heeft geen geldige UTF-8-naam (sommige tekens kunnen niet worden getoond) core_file_modified_before_epoch = Het bestand { $name } lijkt aangepast te zijn voor Unix Epoch core_folder_modified_before_epoch = Map { $name } lijkt aangepast te zijn voor Unix Epoch core_file_no_modification_date = Niet in staat om de datum van bestand { $name }te krijgen, reden { $reason } core_folder_no_modification_date = Niet in staat om wijzigingsdatum van map { $name }te krijgen, reden { $reason } core_missing_no_chosen_included_directory = Ten minste één map moet worden opgegeven core_directory_wildcard_no_supported = Maps: Wildcards op pad worden niet ondersteund, negeer { $path } core_directory_must_exists = Maps: Opgegeven mappad moet bestaan, afwijzend { $path } core_directory_must_be_directory = Directories: Het opgegeven pad moet naar de map wijzen, { $path } wordt genegeerd core_included_directory_zero_valid_directories = Inclusief map FOUT: Er is niet één juist pad gevonden naar de map die vereist is core_excluded_directory_pointless_slash = Maps: Uitsluiten/is zinloos, omdat er geen bestanden worden gescand core_directory_overlap = Maps: alle mappen om overlappingen te zoeken met uitgesloten mappen core_directory_unable_to_get_device_id = Maps: Kan apparaat-id niet ophalen uit map { $path } core_ffmpeg_not_found = Kan de juiste installatie van FFmpeg niet vinden core_ffmpeg_not_found_windows = Zorg ervoor dat ffmpeg.exe en ffprobe.exe beschikbaar zijn in PATH of direct in dezelfde map geplaatst zijn waar de app uitvoerbaar is core_ffmpeg_missing_in_snap = Vergelijkbare video's werken momenteel niet met snap, als je wilt helpen kijken naar - { $url } core_saving_to_cache = Opgeslagen in bestand { $number } cache items core_loading_from_cache = Geladen uit cache { $number } items czkawka_core-8.0.0/i18n/no/czkawka_core.ftl000064400000000000000000000044511046102023000166450ustar 00000000000000# Core core_similarity_original = Opprinnelig core_similarity_very_high = Veldig høy core_similarity_high = Høy core_similarity_medium = Middels core_similarity_small = Liten core_similarity_very_small = Veldig liten core_similarity_minimal = Minimal core_cannot_open_dir = Kan ikke åpne dir { $dir }, årsak { $reason } core_cannot_read_entry_dir = Kan ikke lese oppføringen i dir { $dir }, årsak { $reason } core_cannot_read_metadata_dir = Kan ikke lese metadata i dir { $dir }, årsak { $reason } core_file_not_utf8_name = Filen { $name } har ikke et gyldig UTF-8-navn (noen tegn kan ikke vises) core_file_modified_before_epoch = Filen { $name } ser ut til å bli endret før Unix Epoch core_folder_modified_before_epoch = Mappen { $name } ser ut til å bli endret før Unix Epoch core_file_no_modification_date = Klarte ikke å hente endringsdato fra filen { $name }. Årsak { $reason } core_folder_no_modification_date = Klarte ikke å hente endringsdato fra mappen { $name }. Årsak { $reason } core_missing_no_chosen_included_directory = Minst en katalog må angis core_directory_wildcard_no_supported = Kataloger: Jokertegn i stien støttes ikke, ignorerer { $path } core_directory_must_exists = Kataloger: Angitt sti for mappe må eksistere. Ignorerer { $path } core_directory_must_be_directory = Kataloger: Angitt sti må peke på mappen. Ignorerer { $path } core_included_directory_zero_valid_directories = Feil med inkludert katalog: Fant ikke én eneste sti til den inkluderte mappen, noe som er påkrevd core_excluded_directory_pointless_slash = Kataloger: Ekskludere / er poengløst, fordi det betyr at ingen filer vil bli skannet core_directory_overlap = Kataloger: Alle kataloger å søke overlapper med ekskluderte mapper core_directory_unable_to_get_device_id = Mapper: Kan ikke hente enhets id fra mappen { $path } core_ffmpeg_not_found = Klarte ikke å finne riktig installasjon av FFmpeg core_ffmpeg_not_found_windows = Pass på at ffmpeg.exe og ffprobe.exe er tilgjengelig i PATH eller plasseres direkte i samme mappe som appen kan kjøres core_ffmpeg_missing_in_snap = Lignende videoer fungerer ikke for øyeblikket med snap. Hvis du vil ha hjelp kan du se her - { $url } core_saving_to_cache = Lagret i filen { $number } cache-oppføringer core_loading_from_cache = Lastet fra hurtigbuffer { $number } oppføringer czkawka_core-8.0.0/i18n/pl/czkawka_core.ftl000064400000000000000000000050011046102023000166340ustar 00000000000000# Core core_similarity_original = Oryginalny core_similarity_very_high = Bardzo Duże core_similarity_high = Duże core_similarity_medium = Średnie core_similarity_small = Małe core_similarity_very_small = Bardzo Małe core_similarity_minimal = Minimalne core_cannot_open_dir = Nie można otworzyć folderu { $dir }, powód { $reason } core_cannot_read_entry_dir = Nie można odczytać danych z folderu { $dir }, powód { $reason } core_cannot_read_metadata_dir = Nie można odczytać metadanych folderu { $dir }, powód { $reason } core_file_not_utf8_name = Plik { $name } nie posiada nazwy zakodowanej za pomocą UTF-8(niektóre znaki mogą się nie wyświetlać) core_file_modified_before_epoch = Plik { $name } ma datę modyfikacji sprzed epoki unixa core_folder_modified_before_epoch = Folder { $name } ma datę modyfikacji sprzed epoki unixa core_file_no_modification_date = Nie udało się pobrać daty modyfikacji z pliku { $name }, powód { $reason } core_folder_no_modification_date = Nie udało się pobrać daty modyfikacji z folderu { $name }, powód { $reason } core_missing_no_chosen_included_directory = Należy podać co najmniej jeden katalog core_directory_wildcard_no_supported = Katalogi: Wildcard na ścieżce nie są obsługiwane, ignorowanie { $path } core_directory_must_exists = Katalogi: Podana ścieżka do folderu musi istnieć, ignorowanie { $path } core_directory_must_be_directory = Katalogi: Podana ścieżka musi wskazywać na katalog, ignorowanie { $path } core_included_directory_zero_valid_directories = Błąd katalogów do przeszukiwania: Nie znaleziono nawet jednej poprawnej ścieżki do przeszukania core_excluded_directory_pointless_slash = Katalogi: Wykluczanie folderu / jest bezcelowe, ponieważ oznacza to, że żadne pliki nie zostaną sprawdzone core_directory_overlap = Katalogi: Wszystkie katalogi do wyszukiwania pokrywają się z wykluczonymi core_directory_unable_to_get_device_id = Katalogi: Nie można uzyskać identyfikatora urządzenia z folderu { $path } core_ffmpeg_not_found = Nie można odnaleźć poprawnej instalacji FFmpeg core_ffmpeg_not_found_windows = Upewnij się, że ffmpeg.exe i ffprobe.exe są dostępne w PATH lub są umieszczone bezpośrednio w tym samym folderze, w którym aplikacja jest uruchamiana. core_ffmpeg_missing_in_snap = Wyszukiwanie podobnych filmów nie działa obecnie w snapach, jeśli chcesz pomóc spójrz na - { $url } core_saving_to_cache = Zapisano do pliku { $number } obiektów core_loading_from_cache = Załadowano z pamięci podręcznej { $number } obiektów czkawka_core-8.0.0/i18n/pt/czkawka_core.ftl000064400000000000000000000050171046102023000166530ustar 00000000000000# Core core_similarity_original = Original core_similarity_very_high = Muito alto core_similarity_high = Alto core_similarity_medium = Média core_similarity_small = Pequeno core_similarity_very_small = Muito Pequeno core_similarity_minimal = Mínimo core_cannot_open_dir = Não é possível abrir o diretório { $dir }, razão { $reason } core_cannot_read_entry_dir = Não é possível ler a entrada no diretório { $dir }, razão { $reason } core_cannot_read_metadata_dir = Não é possível ler os metadados no diretório { $dir }, razão { $reason } core_file_not_utf8_name = Arquivo { $name } não tem nome UTF-8 válido (alguns caracteres não podem ser exibidos) core_file_modified_before_epoch = Arquivo { $name } parece ser modificado antes do Epoch Unix core_folder_modified_before_epoch = A pasta { $name } parece ser modificada antes do Epoch Unix core_file_no_modification_date = Não foi possível obter a data de modificação do arquivo { $name }, motivo { $reason } core_folder_no_modification_date = Não foi possível obter a data de modificação da pasta { $name }, motivo { $reason } core_missing_no_chosen_included_directory = Pelo menos um diretório deve ser fornecido core_directory_wildcard_no_supported = Directorias: Caracteres curinga no caminho não são suportados, ignorando { $path } core_directory_must_exists = Directórios: Caminho da pasta fornecida deve sair, ignorando { $path } core_directory_must_be_directory = Diretórios: Caminho fornecido deve apontar para o diretório, ignorando { $path } core_included_directory_zero_valid_directories = ERRO do Diretório incluído: Não foi encontrado nenhum caminho correto que é necessário incluir core_excluded_directory_pointless_slash = Directorias: Excluir / não faz sentido, porque significa que nenhum arquivo será escaneado core_directory_overlap = Diretórios: Todos os diretórios para pesquisar sobreposições com diretórios excluídos core_directory_unable_to_get_device_id = Directorias: Não foi possível obter o dispositivo id da pasta { $path } core_ffmpeg_not_found = Instalação adequada do FFmpeg não encontrada core_ffmpeg_not_found_windows = Certifique-se de que o ffmpeg.exe e ffprobe.exe estão disponíveis no PATH ou são colocados diretamente na mesma pasta onde o aplicativo é executável core_ffmpeg_missing_in_snap = Vídeos similares não funcionam atualmente com o snap, se você quiser ajudar a olhar - { $url } core_saving_to_cache = Salvo no arquivo { $number } entradas de cache core_loading_from_cache = Carregado do cache { $number } entradas czkawka_core-8.0.0/i18n/pt-BR/czkawka_core.ftl000064400000000000000000000050261046102023000171540ustar 00000000000000# Core core_similarity_original = Original core_similarity_very_high = Muito alto core_similarity_high = alta core_similarity_medium = Média core_similarity_small = Pequeno core_similarity_very_small = Muito Pequeno core_similarity_minimal = Mínimo core_cannot_open_dir = Não é possível abrir o dir { $dir }, razão { $reason } core_cannot_read_entry_dir = Não é possível ler a entrada no diretório { $dir }, razão { $reason } core_cannot_read_metadata_dir = Não é possível ler os metadados no diretório { $dir }, razão { $reason } core_file_not_utf8_name = O arquivo { $name } não possui um nome UTF-8 válido (alguns caracteres não podem ser exibidos) core_file_modified_before_epoch = O arquivo { $name } parece ser modificado antes do Epoch Unix core_folder_modified_before_epoch = Pasta { $name } parece ser modificada antes do Epoch Unix core_file_no_modification_date = Não é possível obter a data de modificação do arquivo { $name }, motivo { $reason } core_folder_no_modification_date = Não é possível obter a data de modificação da pasta { $name }, motivo { $reason } core_missing_no_chosen_included_directory = Pelo menos um diretório deve ser fornecido core_directory_wildcard_no_supported = Directorias: Caracteres curinga no caminho não são suportados, ignorando { $path } core_directory_must_exists = Diretórios: Caminho da pasta fornecida deve existir, ignorando { $path } core_directory_must_be_directory = Directorias: Caminho fornecido deve apontar para o diretório, ignorando { $path } core_included_directory_zero_valid_directories = ERRO do Diretório incluído: Não foi encontrado nenhum caminho correto que é necessário incluir core_excluded_directory_pointless_slash = Directorias: Excluir / não faz sentido, porque significa que nenhum arquivo será escaneado core_directory_overlap = Diretórios: Todos os diretórios para pesquisar sobreposições com diretórios excluídos core_directory_unable_to_get_device_id = Directorias: Não foi possível obter o dispositivo de ajuda da pasta { $path } core_ffmpeg_not_found = Instalação adequada do FFmpeg não encontrada core_ffmpeg_not_found_windows = Certifique-se de que o ffmpeg.exe e ffprobe.exe estão disponíveis no PATH ou são colocados diretamente na mesma pasta onde o aplicativo é executável core_ffmpeg_missing_in_snap = Vídeos similares não funcionam atualmente com o snap, se você quiser ajudar a olhar - { $url } core_saving_to_cache = Salvo no arquivo { $number } entradas de cache core_loading_from_cache = Carregado do cache { $number } entradas czkawka_core-8.0.0/i18n/ro/czkawka_core.ftl000064400000000000000000000047261046102023000166560ustar 00000000000000# Core core_similarity_original = Originală core_similarity_very_high = Foarte Mare core_similarity_high = Ridicat core_similarity_medium = Medie core_similarity_small = Mică core_similarity_very_small = Foarte mic core_similarity_minimal = Minimă core_cannot_open_dir = Nu se poate deschide dir { $dir }, motiv { $reason } core_cannot_read_entry_dir = Nu se poate citi intrarea în dir { $dir }, motivul { $reason } core_cannot_read_metadata_dir = Metadatele nu pot fi citite în dir { $dir }, motivul { $reason } core_file_not_utf8_name = Fișierul { $name } nu are un nume valid UTF-8 (este posibil ca unele caractere să nu fie afișate) core_file_modified_before_epoch = Fișierul { $name } pare să fie modificat înainte de Epoch Unix core_folder_modified_before_epoch = Dosarul { $name } pare să fie modificat înainte de Epoc Unix core_file_no_modification_date = Imposibil de obținut data modificării din fișierul { $name }, motivul { $reason } core_folder_no_modification_date = Imposibil de obținut data modificării din dosarul { $name }, motivul { $reason } core_missing_no_chosen_included_directory = Trebuie furnizat cel puțin un director core_directory_wildcard_no_supported = Directoare: Wildcards pe cale nu sunt acceptate, ignorând { $path } core_directory_must_exists = Directoare: Calea dosarului furnizat trebuie să existe, ignorând { $path } core_directory_must_be_directory = Directoare: Calea specificată trebuie să indice în director, ignorând { $path } core_included_directory_zero_valid_directories = EROARE din Director inclusă: Nici măcar o cale corectă de inclus, care este necesară core_excluded_directory_pointless_slash = Directoare: Excludere / este inutilă, deoarece înseamnă că niciun fișier nu va fi scanat core_directory_overlap = Directoare: Toate directoarele pentru a căuta suprapuneri cu directoarele excluse core_directory_unable_to_get_device_id = Directoare: Imposibil de obținut ID-ul dispozitivului din folderul { $path } core_ffmpeg_not_found = Nu se poate găsi instalarea corectă a FFmpeg core_ffmpeg_not_found_windows = Asigurați-vă că ffmpeg.exe și ffprobe.exe sunt disponibile în PATH sau sunt puse direct în același folder unde este executabilă aplicația core_ffmpeg_missing_in_snap = Videoclipuri similare nu funcționează în prezent cu ancorare, dacă doriți să vă uitați - { $url } core_saving_to_cache = Intrări cache salvate în fişierul { $number } core_loading_from_cache = Încărcat din geocutia { $number } czkawka_core-8.0.0/i18n/ru/czkawka_core.ftl000064400000000000000000000072171046102023000166620ustar 00000000000000# Core core_similarity_original = Оригинальное core_similarity_very_high = Очень высокое core_similarity_high = Высокое core_similarity_medium = Среднее core_similarity_small = Низкое core_similarity_very_small = Очень низкое core_similarity_minimal = Минимальное core_cannot_open_dir = Невозможно открыть каталог { $dir }, причина: { $reason } core_cannot_read_entry_dir = Невозможно прочитать запись в директории { $dir }, причина: { $reason } core_cannot_read_metadata_dir = Невозможно прочитать метаданные в директории { $dir }, причина: { $reason } core_file_not_utf8_name = У файла { $name } неверное имя UTF-8 (некоторые символы могут не отображаться) core_file_modified_before_epoch = Файл { $name }, кажется, изменён до начала эпохи Unix core_folder_modified_before_epoch = Папка { $name }, кажется, изменена до начала эпохи Unix core_file_no_modification_date = Не удаётся получить дату изменения из файла { $name }, причина: { $reason } core_folder_no_modification_date = Не удаётся получить дату изменения из папки { $name }, причина: { $reason } core_missing_no_chosen_included_directory = Должен быть указан хотя бы один каталог core_directory_wildcard_no_supported = Директории: Не поддерживаются маски в путях, будет проигнорирован { $path } core_directory_must_exists = Директории: Указанный путь к папке должен существовать, будет проигнорирован{ $path } core_directory_must_be_directory = Директории: Указанный путь должен указывать на директорию, будет проигнорирован { $path } core_included_directory_zero_valid_directories = Включённый каталог, ОШИБКА: Не найдено ни одного корректного пути для включения в список поиска — обязательно добавить хотя бы один core_excluded_directory_pointless_slash = Директории: Исключение корневой папки «/» бессмысленно, потому что в таком случае ни один файл не будет просканирован core_directory_overlap = Каталоги: Все директории для поиска также присутствуют в списке исключённых каталогов core_directory_unable_to_get_device_id = Каталоги: Не удалось получить идентификатор устройства из папки { $path } core_ffmpeg_not_found = Не удалось найти путь, содержащий корректную инсталляцию FFmpeg core_ffmpeg_not_found_windows = Убедитесь, что ffmpeg.exe и ffprobe.exe доступны в PATH или находятся в той же папке, где это исполняемый файл core_ffmpeg_missing_in_snap = Функция поиска похожих видео пока не работает — если хотите помочь проекту, см. { $url } core_saving_to_cache = Сохранено в файл записей кэша: { $number } core_loading_from_cache = Загружено записей из кэша: { $number } czkawka_core-8.0.0/i18n/sv/czkawka_core.ftl000064400000000000000000000045221046102023000166600ustar 00000000000000# Core core_similarity_original = Ursprunglig core_similarity_very_high = Mycket Hög core_similarity_high = Hög core_similarity_medium = Mellan core_similarity_small = Litet core_similarity_very_small = Väldigt Liten core_similarity_minimal = Minimalt core_cannot_open_dir = Kan inte öppna dir { $dir }anledning { $reason } core_cannot_read_entry_dir = Kan inte läsa post i dir { $dir }, anledning { $reason } core_cannot_read_metadata_dir = Kan inte läsa metadata i dir { $dir }, anledning { $reason } core_file_not_utf8_name = Filen { $name } har inte ett giltigt UTF-8-namn (vissa tecken kan inte visas) core_file_modified_before_epoch = Filen { $name } verkar ändras innan Unix Epoch core_folder_modified_before_epoch = Mappen { $name } verkar ändras innan Unix Epoch core_file_no_modification_date = Det går inte att hämta ändringsdatum från filen { $name }, anledning { $reason } core_folder_no_modification_date = Det går inte att hämta ändringsdatum från mappen { $name }, anledning { $reason } core_missing_no_chosen_included_directory = Minst en katalog måste tillhandahållas core_directory_wildcard_no_supported = Kataloger: Wildcards i sökvägen stöds inte, ignorerar { $path } core_directory_must_exists = Kataloger: Tillhandahållen mappsökväg måste finnas, ignorerar { $path } core_directory_must_be_directory = Kataloger: Tillhandahållen sökväg måste peka på katalogen, ignorerar { $path } core_included_directory_zero_valid_directories = Inkluderad katalog FEL: Hittas inte ens en korrekt sökväg till inkluderad som krävs core_excluded_directory_pointless_slash = Kataloger: Exklusive / är meningslös, eftersom det innebär att inga filer kommer att skannas core_directory_overlap = Kataloger: Alla kataloger att söka överlappar med uteslutna kataloger core_directory_unable_to_get_device_id = Kataloger: Det går inte att hämta enhets-id från mappen { $path } core_ffmpeg_not_found = Kan inte hitta rätt installation av FFmpeg core_ffmpeg_not_found_windows = Se till att ffmpeg.exe och ffprobe.exe är tillgängliga i PATH eller sätts direkt till samma mapp där är app körbar core_ffmpeg_missing_in_snap = Liknande videor fungerar inte just nu med snap, om du vill ha hjälp att titta på - { $url } core_saving_to_cache = Sparad i filen { $number } cacheposter core_loading_from_cache = Laddad från cache { $number } poster czkawka_core-8.0.0/i18n/tr/czkawka_core.ftl000064400000000000000000000050761046102023000166620ustar 00000000000000# Core core_similarity_original = Asıl core_similarity_very_high = Çok Yüksek core_similarity_high = Yüksek core_similarity_medium = Orta core_similarity_small = Düşük core_similarity_very_small = Çok Düşük core_similarity_minimal = Aşırı Düşük core_cannot_open_dir = { $dir } dizini açılamıyor, nedeni: { $reason } core_cannot_read_entry_dir = { $dir } dizinindeki girdi okunamıyor, nedeni: { $reason } core_cannot_read_metadata_dir = { $dir } dizinindeki metaveri okunamıyor, nedei: { $reason } core_file_not_utf8_name = { $name } dosyasının geçerli bir UTF-8 adı yok (kimi karakterler gösterilemeyebilir) core_file_modified_before_epoch = { $name } dosyası Unix Epoch'tan önce değiştirilmiş gibi görünüyor. core_folder_modified_before_epoch = { $name } klasörü Unix Epoch'tan önce değiştirilmiş gibi görünüyor. core_file_no_modification_date = { $name } dosyasının değişiklik tarihine erişilemiyor, nedeni: { $reason } core_folder_no_modification_date = { $name } klasörünün değişiklik tarihine erişilemiyor, nedeni: { $reason } core_missing_no_chosen_included_directory = "Aranacak Dizinler" listesinde en az bir dizin yer almalıdır. core_directory_wildcard_no_supported = Dizinler: Yol adında joker karakterler desteklenmez, { $path } yok sayıldı. core_directory_must_exists = Dizinler: Girilen klasör yolu var olmalı, { $path } yok sayıldı. core_directory_must_be_directory = Dizinler: Girilen yol bir dizini göstermelidir, { $path } yok sayıldı. core_included_directory_zero_valid_directories = "Aranacak Dizinler" listesinde HATA: Tarama yapılması için gerekli olan tek bir doğru yol bile bulunamadı. core_excluded_directory_pointless_slash = Dizinler: "/" kök dizinini hariç tutmak anlamsızdır, çünkü bu hiçbir dosyanın taranmayacağı anlamına gelir. core_directory_overlap = Dizinler: Aranacak tüm dizinler, hariç tutulan dizinlerle çakışıyor. core_directory_unable_to_get_device_id = Dizinler: { $path } klasörünün aygıt kimliği bilgisine erişilemiyor. core_ffmpeg_not_found = FFmpeg'in uygun kurulumu bulunamıyor. core_ffmpeg_not_found_windows = "ffmpeg(.exe)" ve "ffprobe(.exe)" uygulamalarının PATH dizininde ya da uygulamanın doğrudan yürütüldüğü dizinde yer aldığından ve 'yürütülebilir' olarak işaretlendiğinden emin olun. core_ffmpeg_missing_in_snap = Benzer Videolar şu anda snap ile çalışmıyor, eğer yardım istiyorsanız - { $url } core_saving_to_cache = { $number } adet önbellek kaydı dosyaya kaydedildi core_loading_from_cache = Önbellekten { $number } adet kayıt yüklendi czkawka_core-8.0.0/i18n/uk/czkawka_core.ftl000064400000000000000000000071231046102023000166470ustar 00000000000000# Core core_similarity_original = Оригінал core_similarity_very_high = Дуже висока core_similarity_high = Висока core_similarity_medium = Середня core_similarity_small = Низька core_similarity_very_small = Дуже низька core_similarity_minimal = Мінімальна core_cannot_open_dir = Не вдалося відкрити каталог { $dir }, причина: { $reason } core_cannot_read_entry_dir = Не вдалося прочитати запис в каталозі { $dir }, причина: { $reason } core_cannot_read_metadata_dir = Не вдалося прочитати метадані в каталозі { $dir }, причина: { $reason } core_file_not_utf8_name = Файл { $name } не має припустимого імені UTF-8 (деякі символи не можуть бути показані) core_file_modified_before_epoch = Файл { $name }, здається, змінено до початку епохи Unix core_folder_modified_before_epoch = Папка { $name }, здається, змінена до початку епохи Unix core_file_no_modification_date = Не вдалося отримати дату модифікації з файлу { $name }, причина: { $reason } core_folder_no_modification_date = Не вдалося отримати дату модифікації з каталогу { $name }, причина: { $reason } core_missing_no_chosen_included_directory = Необхідно вказати принаймні один каталог core_directory_wildcard_no_supported = Директорії: Не підтримуються маски у шляхах, буде проігнорован { $path } core_directory_must_exists = Директорії: Вказаний шлях до папки має існувати, буде проігнорован { $path } core_directory_must_be_directory = Директорії: Вказаний шлях повинен вказувати на директорію, буде проігнорован { $path } core_included_directory_zero_valid_directories = Включений каталог, ПОМИЛКА: Не знайдено жодного коректного шляху для включення до списку пошуку — обов'язково додати хоча б один core_excluded_directory_pointless_slash = Директорії: Виключення кореневого каталогу «/» не має сенсу, тому що в такому разі жоден файл не буде просканований core_directory_overlap = Каталоги: Усі директорії для пошуку також присутні у списку виключених каталогів core_directory_unable_to_get_device_id = Каталоги: Не вдалося отримати ідентифікатор пристрою з папки { $path } core_ffmpeg_not_found = Неможливо знайти шлях, що містить коректну інсталяцію FFmpeg core_ffmpeg_not_found_windows = Будьте впевнені, що ffmpeg.exe і ffprobe.exe доступні в PATH або прямо в тій же папці, де є виконуваний додаток core_ffmpeg_missing_in_snap = Функція пошуку схожих відео поки не працює — якщо хочете допомогти проекту, див. {$url} core_saving_to_cache = Збережено записів кешу у файл: { $number } core_loading_from_cache = Завантажено записів з кешу: { $number } czkawka_core-8.0.0/i18n/zh/czkawka_core.ftl000064400000000000000000000042451046102023000166530ustar 00000000000000# Core core_similarity_original = 原版 core_similarity_very_high = 非常高 core_similarity_high = 高 core_similarity_medium = 中 core_similarity_small = 小的 core_similarity_very_small = 非常小 core_similarity_minimal = 最小化 core_cannot_open_dir = 无法打开目录 { $dir },因为 { $reason } core_cannot_read_entry_dir = 无法在目录 { $dir } 中读取条目,因为 { $reason } core_cannot_read_metadata_dir = 无法读取目录 { $dir } 中的元数据,因为 { $reason } core_file_not_utf8_name = 文件 { $name } 没有有效的 UTF-8 名称 (可能无法显示一些字符) core_file_modified_before_epoch = 文件 { $name } 似乎在 Unix Epoch 之前被修改过 core_folder_modified_before_epoch = 文件夹 { $name } 似乎在 Unix Epoch 之前被修改过 core_file_no_modification_date = 无法从文件 { $name } 获取修改日期,因为 { $reason } core_folder_no_modification_date = 无法从文件夹 { $name } 获取修改日期,因为 { $reason } core_missing_no_chosen_included_directory = 必须至少提供一个目录 core_directory_wildcard_no_supported = 目录:不支持路径中的通配符,忽略 { $path } core_directory_must_exists = 目录:提供的文件夹路径必须退出,忽略 { $path } core_directory_must_be_directory = 目录:提供的路径必须指向目录,忽略 { $path } core_included_directory_zero_valid_directories = 包括目录错误:即使找不到一个需要包含的正确路径 core_excluded_directory_pointless_slash = 目录:不包括 / 无意义,因为它意味着没有文件将被扫描 core_directory_overlap = 目录:所有要搜索与排除目录重叠的目录 core_directory_unable_to_get_device_id = 目录:无法从文件夹 { $path } 获取设备 id core_ffmpeg_not_found = FFmpeg未被正确安装 core_ffmpeg_not_found_windows = 请确保 ffmpeg.exe 和 ffprobe.exe 在 PATH 中可用,或者直接放入应用可执行文件的同一文件夹中 core_ffmpeg_missing_in_snap = 类似的视频目前不适用于快照,如果您想要帮助查看- { $url } core_saving_to_cache = 保存到文件 { $number } 个缓存条目 core_loading_from_cache = 从缓存加载 { $number } 个条目 czkawka_core-8.0.0/i18n.toml000064400000000000000000000007111046102023000137470ustar 00000000000000# (Required) The language identifier of the language used in the # source code for gettext system, and the primary fallback language # (for which all strings must be present) when using the fluent # system. fallback_language = "en" # Use the fluent localization system. [fluent] # (Required) The path to the assets directory. # The paths inside the assets directory should be structured like so: # `assets_dir/{language}/{domain}.ftl` assets_dir = "i18n" czkawka_core-8.0.0/src/bad_extensions.rs000064400000000000000000000413241046102023000164420ustar 00000000000000use std::collections::{BTreeSet, HashMap}; use std::io::prelude::*; use std::mem; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; use crossbeam_channel::{Receiver, Sender}; use fun_time::fun_time; use log::debug; use mime_guess::get_mime_extensions; use rayon::prelude::*; use serde::Serialize; use crate::common::{check_if_stop_received, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads}; use crate::common_dir_traversal::{DirTraversalBuilder, DirTraversalResult, FileEntry, ToolType}; use crate::common_tool::{CommonData, CommonToolData}; use crate::common_traits::*; use crate::progress_data::{CurrentStage, ProgressData}; static DISABLED_EXTENSIONS: &[&str] = &["file", "cache", "bak", "data"]; // Such files can have any type inside // This adds several workarounds for bugs/invalid recognizing types by external libraries // ("real_content_extension", "current_file_extension") const WORKAROUNDS: &[(&str, &str)] = &[ // Wine/Windows ("der", "cat"), ("exe", "acm"), ("exe", "ax"), ("exe", "bck"), ("exe", "com"), ("exe", "cpl"), ("exe", "dll16"), ("exe", "dll"), ("exe", "drv16"), ("exe", "drv"), ("exe", "ds"), ("exe", "efi"), ("exe", "exe16"), ("exe", "fon"), // Type of font or something else ("exe", "mod16"), ("exe", "msstyles"), ("exe", "mui"), ("exe", "mun"), ("exe", "orig"), ("exe", "ps1xml"), ("exe", "rll"), ("exe", "rs"), ("exe", "scr"), ("exe", "signed"), ("exe", "sys"), ("exe", "tlb"), ("exe", "tsp"), ("exe", "vdm"), ("exe", "vxd"), ("exe", "winmd"), ("gz", "loggz"), ("xml", "adml"), ("xml", "admx"), ("xml", "camp"), ("xml", "cdmp"), ("xml", "cdxml"), ("xml", "dgml"), ("xml", "diagpkg"), ("xml", "gmmp"), ("xml", "library-ms"), ("xml", "man"), ("xml", "manifest"), ("xml", "msc"), ("xml", "mum"), ("xml", "resx"), ("zip", "wmz"), // Games specific extensions - cannot be used here common extensions like zip ("gz", "h3m"), // Heroes 3 ("zip", "hashdb"), // Gog ("c2", "zip"), // King of the Dark Age ("c2", "bmp"), // King of the Dark Age ("c2", "avi"), // King of the Dark Age ("c2", "exe"), // King of the Dark Age // Raw images ("tif", "nef"), ("tif", "dng"), ("tif", "arw"), // Other ("der", "keystore"), // Godot/Android keystore ("exe", "pyd"), // Python/Mingw ("gz", "blend"), // Blender ("gz", "crate"), // Cargo ("gz", "svgz"), // Archive svg ("gz", "tgz"), // Archive ("html", "dtd"), // Mingw ("html", "ent"), // Mingw ("html", "md"), // Markdown ("html", "svelte"), // Svelte ("jpg", "jfif"), // Photo format ("m4v", "mp4"), // m4v and mp4 are interchangeable ("mobi", "azw3"), // Ebook format ("mpg", "vob"), // Weddings in parts have usually vob extension ("obj", "bin"), // Multiple apps, Czkawka, Nvidia, Windows ("obj", "o"), // Compilators ("odp", "otp"), // LibreOffice ("ods", "ots"), // Libreoffice ("odt", "ott"), // Libreoffice ("ogg", "ogv"), // Audio format ("pem", "key"), // curl, openssl ("png", "kpp"), // Krita presets ("pptx", "ppsx"), // Powerpoint ("sh", "bash"), // Linux ("sh", "guess"), // GNU ("sh", "lua"), // Lua ("sh", "js"), // Javascript ("sh", "pl"), // Gnome/Linux ("sh", "pm"), // Gnome/Linux ("sh", "py"), // Python ("sh", "pyx"), // Python ("sh", "rs"), // Rust ("sh", "sample"), // Git ("xml", "bsp"), // Quartus ("xml", "cbp"), // CodeBlocks config ("xml", "cfg"), // Multiple apps - Godot ("xml", "cmb"), // Cambalache ("xml", "conf"), // Multiple apps - Python ("xml", "config"), // Multiple apps - QT Creator ("xml", "dae"), // 3D models ("xml", "docbook"), // ("xml", "fb2"), // ("xml", "filters"), // Visual studio ("xml", "gir"), // GTK ("xml", "glade"), // Glade ("xml", "iml"), // Intelij Idea ("xml", "kdenlive"), // KDenLive ("xml", "lang"), // ? ("xml", "nuspec"), // Nuget ("xml", "policy"), // SystemD ("xml", "qsys"), // Quartus ("xml", "sopcinfo"), // Quartus ("xml", "svg"), // SVG ("xml", "ui"), // Cambalache, Glade ("xml", "user"), // Qtcreator ("xml", "vbox"), // VirtualBox ("xml", "vbox-prev"), // VirtualBox ("xml", "vcproj"), // VisualStudio ("xml", "vcxproj"), // VisualStudio ("xml", "xba"), // Libreoffice ("xml", "xcd"), // Libreoffice files ("zip", "apk"), // Android apk ("zip", "cbr"), // Comics ("zip", "dat"), // Multiple - python, brave ("zip", "doc"), // Word ("zip", "docx"), // Word ("zip", "jar"), // Java ("zip", "kra"), // Krita ("zip", "kgm"), // Krita ("zip", "nupkg"), // Nuget packages ("zip", "odg"), // Libreoffice ("zip", "pptx"), // Powerpoint ("zip", "whl"), // Python packages ("zip", "xlsx"), // Excel ("zip", "xpi"), // Firefox extensions ("zip", "zcos"), // Scilab // Probably invalid ("html", "svg"), ("xml", "html"), // Probably bug in external library ("msi", "ppt"), // Not sure why ppt is not recognized ("msi", "doc"), // Not sure why doc is not recognized ("exe", "xls"), // Not sure why xls is not recognized ]; #[derive(Clone, Serialize, Debug)] pub struct BadFileEntry { pub path: PathBuf, pub modified_date: u64, pub size: u64, pub current_extension: String, pub proper_extensions_group: String, pub proper_extension: String, } impl ResultEntry for BadFileEntry { fn get_path(&self) -> &Path { &self.path } fn get_modified_date(&self) -> u64 { self.modified_date } fn get_size(&self) -> u64 { self.size } } #[derive(Default)] pub struct Info { pub number_of_files_with_bad_extension: usize, } pub struct BadExtensionsParameters { pub include_files_without_extension: bool, } impl BadExtensionsParameters { pub fn new() -> Self { Self { include_files_without_extension: false, } // TODO add option to all modes } } impl Default for BadExtensionsParameters { fn default() -> Self { Self::new() } } pub struct BadExtensions { common_data: CommonToolData, information: Info, files_to_check: Vec, bad_extensions_files: Vec, params: BadExtensionsParameters, } impl BadExtensions { pub fn new(params: BadExtensionsParameters) -> Self { Self { common_data: CommonToolData::new(ToolType::BadExtensions), information: Info::default(), files_to_check: Default::default(), bad_extensions_files: Default::default(), params, } } #[fun_time(message = "find_bad_extensions_files", level = "info")] pub fn find_bad_extensions_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) { self.prepare_items(); if !self.check_files(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } if !self.look_for_bad_extensions_files(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } self.debug_print(); } #[fun_time(message = "check_files", level = "debug")] fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { let result = DirTraversalBuilder::new() .common_data(&self.common_data) .group_by(|_fe| ()) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .build() .run(); match result { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { self.files_to_check = grouped_file_entries.into_values().flatten().collect(); self.common_data.text_messages.warnings.extend(warnings); true } DirTraversalResult::Stopped => false, } } #[fun_time(message = "look_for_bad_extensions_files", level = "debug")] fn look_for_bad_extensions_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { if self.files_to_check.is_empty() { return true; } let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::BadExtensionsChecking, self.files_to_check.len(), self.get_test_type()); let files_to_check = mem::take(&mut self.files_to_check); let mut hashmap_workarounds: HashMap<&str, Vec<&str>> = Default::default(); for (proper, found) in WORKAROUNDS { hashmap_workarounds.entry(found).or_default().push(proper); } self.bad_extensions_files = self.verify_extensions(files_to_check, &atomic_counter, stop_receiver, &check_was_stopped, &hashmap_workarounds); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); // Break if stop was clicked if check_was_stopped.load(Ordering::Relaxed) { return false; } self.information.number_of_files_with_bad_extension = self.bad_extensions_files.len(); debug!("Found {} files with invalid extension.", self.information.number_of_files_with_bad_extension); true } #[fun_time(message = "verify_extensions", level = "debug")] fn verify_extensions( &self, files_to_check: Vec, atomic_counter: &Arc, stop_receiver: Option<&Receiver<()>>, check_was_stopped: &AtomicBool, hashmap_workarounds: &HashMap<&str, Vec<&str>>, ) -> Vec { files_to_check .into_par_iter() .map(|file_entry| { atomic_counter.fetch_add(1, Ordering::Relaxed); if check_if_stop_received(stop_receiver) { check_was_stopped.store(true, Ordering::Relaxed); return None; } // Check what exactly content file contains let kind = match infer::get_from_path(&file_entry.path) { Ok(k) => match k { Some(t) => t, None => return Some(None), }, Err(_) => return Some(None), }; let proper_extension = kind.extension(); let Some(current_extension) = self.get_and_validate_extension(&file_entry, proper_extension) else { return Some(None); }; // Check for all extensions that file can use(not sure if it is worth to do it) let (mut all_available_extensions, valid_extensions) = self.check_for_all_extensions_that_file_can_use(hashmap_workarounds, ¤t_extension, proper_extension); if all_available_extensions.is_empty() { // Not found any extension return Some(None); } else if current_extension.is_empty() { if !self.params.include_files_without_extension { return Some(None); } } else if all_available_extensions.take(¤t_extension).is_some() { // Found proper extension return Some(None); } Some(Some(BadFileEntry { path: file_entry.path, modified_date: file_entry.modified_date, size: file_entry.size, current_extension, proper_extensions_group: valid_extensions, proper_extension: proper_extension.to_string(), })) }) .while_some() .flatten() .collect::>() } #[allow(clippy::unused_self)] fn get_and_validate_extension(&self, file_entry: &FileEntry, proper_extension: &str) -> Option { let current_extension; // Extract current extension from file if let Some(extension) = file_entry.path.extension() { let extension = extension.to_string_lossy().to_lowercase(); if DISABLED_EXTENSIONS.contains(&extension.as_str()) { return None; } // Text longer than 10 characters is not considered as extension if extension.len() > 10 { current_extension = String::new(); } else { current_extension = extension; } } else { current_extension = String::new(); } // Already have proper extension, no need to do more things if current_extension == proper_extension { return None; } Some(current_extension) } fn check_for_all_extensions_that_file_can_use( &self, hashmap_workarounds: &HashMap<&str, Vec<&str>>, current_extension: &str, proper_extension: &str, ) -> (BTreeSet, String) { let mut all_available_extensions: BTreeSet = Default::default(); // TODO Isn't this a bug? // Why to file without extensions we set this as empty let valid_extensions = if current_extension.is_empty() { String::new() } else { for mim in mime_guess::from_ext(proper_extension) { if let Some(all_ext) = get_mime_extensions(&mim) { for ext in all_ext { all_available_extensions.insert((*ext).to_string()); } } } // Workarounds if let Some(vec_pre) = hashmap_workarounds.get(current_extension) { for pre in vec_pre { if all_available_extensions.contains(*pre) { all_available_extensions.insert(current_extension.to_string()); break; } } } let mut guessed_multiple_extensions = format!("({proper_extension}) - "); for ext in &all_available_extensions { guessed_multiple_extensions.push_str(ext); guessed_multiple_extensions.push(','); } guessed_multiple_extensions.pop(); guessed_multiple_extensions }; (all_available_extensions, valid_extensions) } } impl DebugPrint for BadExtensions { fn debug_print(&self) { if !cfg!(debug_assertions) { return; } println!("---------------DEBUG PRINT---------------"); self.debug_print_common(); println!("-----------------------------------------"); } } impl PrintResults for BadExtensions { fn write_results(&self, writer: &mut T) -> std::io::Result<()> { writeln!( writer, "Results of searching {:?} with excluded directories {:?} and excluded items {:?}", self.common_data.directories.included_directories, self.common_data.directories.excluded_directories, self.common_data.excluded_items.get_excluded_items() )?; writeln!(writer, "Found {} files with invalid extension.\n", self.information.number_of_files_with_bad_extension)?; for file_entry in &self.bad_extensions_files { writeln!(writer, "\"{}\" ----- {}", file_entry.path.to_string_lossy(), file_entry.proper_extensions_group)?; } Ok(()) } fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()> { self.save_results_to_file_as_json_internal(file_name, &self.bad_extensions_files, pretty_print) } } impl BadExtensions { pub const fn get_bad_extensions_files(&self) -> &Vec { &self.bad_extensions_files } pub fn get_params(&self) -> &BadExtensionsParameters { &self.params } pub const fn get_information(&self) -> &Info { &self.information } } impl CommonData for BadExtensions { fn get_cd(&self) -> &CommonToolData { &self.common_data } fn get_cd_mut(&mut self) -> &mut CommonToolData { &mut self.common_data } } czkawka_core-8.0.0/src/big_file.rs000064400000000000000000000142221046102023000151720ustar 00000000000000use std::fs; use std::io::Write; use crossbeam_channel::{Receiver, Sender}; use fun_time::fun_time; use humansize::{format_size, BINARY}; use log::debug; use rayon::prelude::*; use crate::common_dir_traversal::{DirTraversalBuilder, DirTraversalResult, FileEntry, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::{DebugPrint, PrintResults}; use crate::progress_data::ProgressData; #[derive(Copy, Clone, Eq, PartialEq)] pub enum SearchMode { BiggestFiles, SmallestFiles, } #[derive(Debug, Default)] pub struct Info { pub number_of_real_files: usize, } pub struct BigFileParameters { pub number_of_files_to_check: usize, pub search_mode: SearchMode, } impl BigFileParameters { pub fn new(number_of_files: usize, search_mode: SearchMode) -> Self { let number_of_files_to_check = if number_of_files == 0 { 1 } else { number_of_files }; Self { number_of_files_to_check, search_mode, } } } pub struct BigFile { common_data: CommonToolData, information: Info, big_files: Vec, params: BigFileParameters, } impl BigFile { pub fn new(params: BigFileParameters) -> Self { Self { common_data: CommonToolData::new(ToolType::BigFile), information: Info::default(), big_files: Default::default(), params, } } #[fun_time(message = "find_big_files", level = "info")] pub fn find_big_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) { self.prepare_items(); if !self.look_for_big_files(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } self.delete_files(); self.debug_print(); } #[fun_time(message = "look_for_big_files", level = "debug")] fn look_for_big_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { let result = DirTraversalBuilder::new() .group_by(|_fe| ()) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .common_data(&self.common_data) .minimal_file_size(1) .maximal_file_size(u64::MAX) .build() .run(); match result { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { let mut all_files = grouped_file_entries.into_values().flatten().collect::>(); all_files.par_sort_unstable_by_key(|fe| fe.size); if self.get_params().search_mode == SearchMode::BiggestFiles { all_files.reverse(); } if all_files.len() > self.get_params().number_of_files_to_check { all_files.truncate(self.get_params().number_of_files_to_check); } self.big_files = all_files; self.common_data.text_messages.warnings.extend(warnings); self.information.number_of_real_files = self.big_files.len(); debug!("check_files - Found {} biggest/smallest files.", self.big_files.len()); true } DirTraversalResult::Stopped => false, } } fn delete_files(&mut self) { match self.common_data.delete_method { DeleteMethod::Delete => { for file_entry in &self.big_files { if fs::remove_file(&file_entry.path).is_err() { self.common_data.text_messages.warnings.push(file_entry.path.to_string_lossy().to_string()); } } } DeleteMethod::None => { //Just do nothing } _ => unreachable!(), } } } impl DebugPrint for BigFile { fn debug_print(&self) { if !cfg!(debug_assertions) { return; } println!("### INDIVIDUAL DEBUG PRINT ###"); println!("Info: {:?}", self.information); println!("Number of files to check - {}", self.get_params().number_of_files_to_check); self.debug_print_common(); println!("-----------------------------------------"); } } impl PrintResults for BigFile { fn write_results(&self, writer: &mut T) -> std::io::Result<()> { writeln!( writer, "Results of searching {:?} with excluded directories {:?} and excluded items {:?}", self.common_data.directories.included_directories, self.common_data.directories.excluded_directories, self.common_data.excluded_items.get_excluded_items() )?; if self.information.number_of_real_files != 0 { if self.get_params().search_mode == SearchMode::BiggestFiles { writeln!(writer, "{} the biggest files.\n\n", self.information.number_of_real_files)?; } else { writeln!(writer, "{} the smallest files.\n\n", self.information.number_of_real_files)?; } for file_entry in &self.big_files { writeln!( writer, "{} ({}) - \"{}\"", format_size(file_entry.size, BINARY), file_entry.size, file_entry.path.to_string_lossy() )?; } } else { writeln!(writer, "Not found any files.")?; } Ok(()) } fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()> { self.save_results_to_file_as_json_internal(file_name, &self.big_files, pretty_print) } } impl CommonData for BigFile { fn get_cd(&self) -> &CommonToolData { &self.common_data } fn get_cd_mut(&mut self) -> &mut CommonToolData { &mut self.common_data } } impl BigFile { pub const fn get_big_files(&self) -> &Vec { &self.big_files } pub const fn get_information(&self) -> &Info { &self.information } pub fn get_params(&self) -> &BigFileParameters { &self.params } } czkawka_core-8.0.0/src/broken_files.rs000064400000000000000000000451231046102023000161000ustar 00000000000000use std::collections::{BTreeMap, HashSet}; use std::fs::File; use std::io::prelude::*; use std::path::{Path, PathBuf}; use std::sync::atomic::Ordering; use std::{fs, mem, panic}; use crossbeam_channel::{Receiver, Sender}; use fun_time::fun_time; use log::debug; use pdf::file::FileOptions; use pdf::object::ParseOptions; use pdf::PdfError; use pdf::PdfError::Try; use rayon::prelude::*; use serde::{Deserialize, Serialize}; use crate::common::{ check_if_stop_received, create_crash_message, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS, IMAGE_RS_BROKEN_FILES_EXTENSIONS, PDF_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS, }; use crate::common_cache::{extract_loaded_cache, get_broken_files_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized}; use crate::common_dir_traversal::{DirTraversalBuilder, DirTraversalResult, FileEntry, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::*; use crate::progress_data::{CurrentStage, ProgressData}; #[derive(Clone, Serialize, Deserialize, Debug)] pub struct BrokenEntry { pub path: PathBuf, pub modified_date: u64, pub size: u64, pub type_of_file: TypeOfFile, pub error_string: String, } impl ResultEntry for BrokenEntry { fn get_path(&self) -> &Path { &self.path } fn get_modified_date(&self) -> u64 { self.modified_date } fn get_size(&self) -> u64 { self.size } } impl FileEntry { fn into_broken_entry(self) -> BrokenEntry { BrokenEntry { size: self.size, path: self.path, modified_date: self.modified_date, type_of_file: TypeOfFile::Unknown, error_string: String::new(), } } } #[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] pub enum TypeOfFile { Unknown = -1, Image = 0, ArchiveZip, Audio, PDF, } bitflags! { #[derive(PartialEq, Copy, Clone, Debug)] pub struct CheckedTypes : u32 { const NONE = 0; const PDF = 0b1; const AUDIO = 0b10; const IMAGE = 0b100; const ARCHIVE = 0b1000; } } #[derive(Default)] pub struct Info { pub number_of_broken_files: usize, } pub struct BrokenFilesParameters { pub checked_types: CheckedTypes, } impl BrokenFilesParameters { pub fn new(checked_types: CheckedTypes) -> Self { Self { checked_types } } } pub struct BrokenFiles { common_data: CommonToolData, information: Info, files_to_check: BTreeMap, broken_files: Vec, params: BrokenFilesParameters, } impl BrokenFiles { pub fn new(params: BrokenFilesParameters) -> Self { Self { common_data: CommonToolData::new(ToolType::BrokenFiles), information: Info::default(), files_to_check: Default::default(), broken_files: Default::default(), params, } } #[fun_time(message = "find_broken_files", level = "info")] pub fn find_broken_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) { self.prepare_items(); if !self.check_files(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } if !self.look_for_broken_files(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } self.delete_files(); self.debug_print(); } #[fun_time(message = "check_files", level = "debug")] fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { let zip_extensions = ZIP_FILES_EXTENSIONS.iter().collect::>(); let audio_extensions = AUDIO_FILES_EXTENSIONS.iter().collect::>(); let pdf_extensions = PDF_FILES_EXTENSIONS.iter().collect::>(); let images_extensions = IMAGE_RS_BROKEN_FILES_EXTENSIONS.iter().collect::>(); let mut extensions = Vec::new(); let vec_extensions = [ (CheckedTypes::PDF, PDF_FILES_EXTENSIONS), (CheckedTypes::AUDIO, AUDIO_FILES_EXTENSIONS), (CheckedTypes::ARCHIVE, ZIP_FILES_EXTENSIONS), (CheckedTypes::IMAGE, IMAGE_RS_BROKEN_FILES_EXTENSIONS), ]; for (checked_type, extensions_to_add) in &vec_extensions { if self.get_params().checked_types.contains(*checked_type) { extensions.extend_from_slice(extensions_to_add); } } self.common_data.extensions.set_and_validate_allowed_extensions(&extensions); if !self.common_data.extensions.set_any_extensions() { return true; } let result = DirTraversalBuilder::new() .group_by(|_fe| ()) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .common_data(&self.common_data) .build() .run(); match result { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { self.files_to_check = grouped_file_entries .into_values() .flatten() .map(|fe| { let mut broken_entry = fe.into_broken_entry(); broken_entry.type_of_file = check_extension_availability(broken_entry.get_path(), &images_extensions, &zip_extensions, &audio_extensions, &pdf_extensions); (broken_entry.path.to_string_lossy().to_string(), broken_entry) }) .collect(); self.common_data.text_messages.warnings.extend(warnings); debug!("check_files - Found {} files to check.", self.files_to_check.len()); true } DirTraversalResult::Stopped => false, } } fn check_broken_image(&self, mut file_entry: BrokenEntry) -> Option { let mut file_entry_clone = file_entry.clone(); panic::catch_unwind(|| { if let Err(e) = image::open(&file_entry.path) { let error_string = e.to_string(); // This error is a problem with image library, remove check when https://github.com/image-rs/jpeg-decoder/issues/130 will be fixed if error_string.contains("spectral selection is not allowed in non-progressive scan") { return None; } file_entry.error_string = error_string; } Some(file_entry) }) .unwrap_or_else(|_| { let message = create_crash_message("Image-rs", &file_entry_clone.path.to_string_lossy(), "https://github.com/Serial-ATA/lofty-rs"); println!("{message}"); file_entry_clone.error_string = message; Some(file_entry_clone) }) } fn check_broken_zip(&self, mut file_entry: BrokenEntry) -> Option { match File::open(&file_entry.path) { Ok(file) => { if let Err(e) = zip::ZipArchive::new(file) { file_entry.error_string = e.to_string(); } Some(file_entry) } Err(_inspected) => None, } } fn check_broken_audio(&self, mut file_entry: BrokenEntry) -> Option { match File::open(&file_entry.path) { Ok(file) => { let mut file_entry_clone = file_entry.clone(); panic::catch_unwind(|| { if let Err(e) = audio_checker::parse_audio_file(file) { file_entry.error_string = e.to_string(); } Some(file_entry) }) .unwrap_or_else(|_| { let message = create_crash_message("Symphonia", &file_entry_clone.path.to_string_lossy(), "https://github.com/pdeljanov/Symphonia"); println!("{message}"); file_entry_clone.error_string = message; Some(file_entry_clone) }) } Err(_inspected) => None, } } fn check_broken_pdf(&self, mut file_entry: BrokenEntry) -> Option { let parser_options = ParseOptions::tolerant(); // Only show as broken files with really big bugs let mut file_entry_clone = file_entry.clone(); panic::catch_unwind(|| { match FileOptions::cached().parse_options(parser_options).open(&file_entry.path) { Ok(file) => { for idx in 0..file.num_pages() { if let Err(e) = file.get_page(idx) { let err = validate_pdf_error(&mut file_entry, e); if let PdfError::InvalidPassword = err { return None; } break; } } } Err(e) => { if let PdfError::Io { .. } = e { return None; } let err = validate_pdf_error(&mut file_entry, e); if let PdfError::InvalidPassword = err { return None; } } } Some(file_entry) }) .unwrap_or_else(|_| { let message = create_crash_message("PDF-rs", &file_entry_clone.path.to_string_lossy(), "https://github.com/pdf-rs/pdf"); println!("{message}"); file_entry_clone.error_string = message; Some(file_entry_clone) }) } #[fun_time(message = "load_cache", level = "debug")] fn load_cache(&mut self) -> (BTreeMap, BTreeMap, BTreeMap) { let loaded_hash_map; let mut records_already_cached: BTreeMap = Default::default(); let mut non_cached_files_to_check: BTreeMap = Default::default(); let files_to_check = mem::take(&mut self.files_to_check); if self.common_data.use_cache { let (messages, loaded_items) = load_cache_from_file_generalized_by_path::(&get_broken_files_cache_file(), self.get_delete_outdated_cache(), &files_to_check); self.get_text_messages_mut().extend_with_another_messages(messages); loaded_hash_map = loaded_items.unwrap_or_default(); extract_loaded_cache(&loaded_hash_map, files_to_check, &mut records_already_cached, &mut non_cached_files_to_check); } else { loaded_hash_map = Default::default(); non_cached_files_to_check = files_to_check; } (loaded_hash_map, records_already_cached, non_cached_files_to_check) } #[fun_time(message = "look_for_broken_files", level = "debug")] fn look_for_broken_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { if self.files_to_check.is_empty() { return true; } let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.load_cache(); let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::BrokenFilesChecking, non_cached_files_to_check.len(), self.get_test_type()); debug!("look_for_broken_files - started finding for broken files"); let mut vec_file_entry: Vec = non_cached_files_to_check .into_par_iter() .map(|(_, file_entry)| { atomic_counter.fetch_add(1, Ordering::Relaxed); if check_if_stop_received(stop_receiver) { return None; } match file_entry.type_of_file { TypeOfFile::Image => Some(self.check_broken_image(file_entry)), TypeOfFile::ArchiveZip => Some(self.check_broken_zip(file_entry)), TypeOfFile::Audio => Some(self.check_broken_audio(file_entry)), TypeOfFile::PDF => Some(self.check_broken_pdf(file_entry)), // This means that cache read invalid value because maybe cache comes from different czkawka version TypeOfFile::Unknown => Some(None), } }) .while_some() .flatten() .collect::>(); debug!("look_for_broken_files - ended finding for broken files"); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); // Just connect loaded results with already calculated vec_file_entry.extend(records_already_cached.into_values()); self.save_to_cache(&vec_file_entry, loaded_hash_map); self.broken_files = vec_file_entry .into_par_iter() .filter_map(|f| if f.error_string.is_empty() { None } else { Some(f) }) .collect(); self.information.number_of_broken_files = self.broken_files.len(); debug!("Found {} broken files.", self.information.number_of_broken_files); // Clean unused data self.files_to_check = Default::default(); true } #[fun_time(message = "save_to_cache", level = "debug")] fn save_to_cache(&mut self, vec_file_entry: &[BrokenEntry], loaded_hash_map: BTreeMap) { if self.common_data.use_cache { // Must save all results to file, old loaded from file with all currently counted results let mut all_results: BTreeMap = Default::default(); for file_entry in vec_file_entry.iter().cloned() { all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry); } for (_name, file_entry) in loaded_hash_map { all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry); } let messages = save_cache_to_file_generalized(&get_broken_files_cache_file(), &all_results, self.common_data.save_also_as_json, 0); self.get_text_messages_mut().extend_with_another_messages(messages); } } #[fun_time(message = "delete_files", level = "debug")] fn delete_files(&mut self) { match self.common_data.delete_method { DeleteMethod::Delete => { for file_entry in &self.broken_files { if fs::remove_file(&file_entry.path).is_err() { self.common_data.text_messages.warnings.push(file_entry.path.to_string_lossy().to_string()); } } } DeleteMethod::None => { //Just do nothing } _ => { unreachable!() } } } } impl BrokenFiles { pub const fn get_broken_files(&self) -> &Vec { &self.broken_files } pub fn get_params(&self) -> &BrokenFilesParameters { &self.params } pub const fn get_information(&self) -> &Info { &self.information } } impl DebugPrint for BrokenFiles { fn debug_print(&self) { if !cfg!(debug_assertions) { return; } self.debug_print_common(); } } impl PrintResults for BrokenFiles { fn write_results(&self, writer: &mut T) -> std::io::Result<()> { writeln!( writer, "Results of searching {:?} with excluded directories {:?} and excluded items {:?}", self.common_data.directories.included_directories, self.common_data.directories.excluded_directories, self.common_data.excluded_items.get_excluded_items() )?; if !self.broken_files.is_empty() { writeln!(writer, "Found {} broken files.", self.information.number_of_broken_files)?; for file_entry in &self.broken_files { writeln!(writer, "\"{}\" - {}", file_entry.path.to_string_lossy(), file_entry.error_string)?; } } else { write!(writer, "Not found any broken files.")?; } Ok(()) } fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()> { self.save_results_to_file_as_json_internal(file_name, &self.broken_files, pretty_print) } } fn check_extension_availability( full_name: &Path, images_extensions: &HashSet<&&'static str>, zip_extensions: &HashSet<&&'static str>, audio_extensions: &HashSet<&&'static str>, pdf_extensions: &HashSet<&&'static str>, ) -> TypeOfFile { let Some(extension) = full_name.extension() else { debug_assert!(false, "Missing extension"); return TypeOfFile::Unknown; }; let Some(extension_str) = extension.to_str() else { debug_assert!(false, "Extension not really fully str"); return TypeOfFile::Unknown; }; let extension_lowercase = extension_str.to_ascii_lowercase(); if images_extensions.contains(&extension_lowercase.as_str()) { TypeOfFile::Image } else if zip_extensions.contains(&extension_lowercase.as_str()) { TypeOfFile::ArchiveZip } else if audio_extensions.contains(&extension_lowercase.as_str()) { TypeOfFile::Audio } else if pdf_extensions.contains(&extension_lowercase.as_str()) { TypeOfFile::PDF } else { eprintln!("File with unknown extension: \"{}\" - {extension_lowercase}", full_name.to_string_lossy()); debug_assert!(false, "File with unknown extension"); TypeOfFile::Unknown } } fn unpack_pdf_error(e: PdfError) -> PdfError { if let Try { file: _, line: _, column: _, context: _, source, } = e { unpack_pdf_error(*source) } else { e } } fn validate_pdf_error(file_entry: &mut BrokenEntry, e: PdfError) -> PdfError { let mut error_string = e.to_string(); // Workaround for strange error message https://github.com/qarmin/czkawka/issues/898 if error_string.starts_with("Try at") { if let Some(start_index) = error_string.find("/pdf-") { error_string = format!("Decoding error in pdf-rs library - {}", &error_string[start_index..]); } } file_entry.error_string = error_string; unpack_pdf_error(e) } impl CommonData for BrokenFiles { fn get_cd(&self) -> &CommonToolData { &self.common_data } fn get_cd_mut(&mut self) -> &mut CommonToolData { &mut self.common_data } } czkawka_core-8.0.0/src/common.rs000064400000000000000000000651371046102023000147350ustar 00000000000000use std::cmp::Ordering; use std::ffi::OsString; use std::fs::{DirEntry, File, OpenOptions}; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicBool, AtomicUsize}; use std::sync::{atomic, Arc}; use std::thread::{sleep, JoinHandle}; use std::time::{Duration, SystemTime}; use std::{fs, thread}; use crossbeam_channel::Sender; use directories_next::ProjectDirs; use fun_time::fun_time; use handsome_logger::{ColorChoice, ConfigBuilder, TerminalMode}; use log::{debug, info, warn, LevelFilter, Record}; // #[cfg(feature = "heif")] // use libheif_rs::LibHeif; use crate::common_dir_traversal::{CheckingMethod, ToolType}; use crate::common_directory::Directories; use crate::common_items::{ExcludedItems, SingleExcludedItem}; use crate::common_messages::Messages; use crate::common_tool::DeleteMethod; use crate::common_traits::ResultEntry; use crate::duplicate::make_hard_link; use crate::progress_data::{CurrentStage, ProgressData}; use crate::CZKAWKA_VERSION; static NUMBER_OF_THREADS: state::InitCell = state::InitCell::new(); static ALL_AVAILABLE_THREADS: state::InitCell = state::InitCell::new(); pub const DEFAULT_THREAD_SIZE: usize = 8 * 1024 * 1024; // 8 MB pub const DEFAULT_WORKER_THREAD_SIZE: usize = 4 * 1024 * 1024; // 4 MB pub fn get_number_of_threads() -> usize { let data = NUMBER_OF_THREADS.get(); if *data >= 1 { *data } else { get_all_available_threads() } } fn filtering_messages(record: &Record) -> bool { if let Some(module_path) = record.module_path() { module_path.starts_with("czkawka") || module_path.starts_with("krokiet") } else { true } } pub fn setup_logger(disabled_printing: bool) { let log_level = if disabled_printing { LevelFilter::Off } else { LevelFilter::Info }; let config = ConfigBuilder::default().set_level(log_level).set_message_filtering(Some(filtering_messages)).build(); handsome_logger::TermLogger::init(config, TerminalMode::Mixed, ColorChoice::Always).expect("Cannot initialize logger"); } pub fn get_all_available_threads() -> usize { *ALL_AVAILABLE_THREADS.get_or_init(|| { let available_threads = thread::available_parallelism().map(std::num::NonZeroUsize::get).unwrap_or(1); ALL_AVAILABLE_THREADS.set(available_threads); available_threads }) } #[allow(clippy::vec_init_then_push)] pub fn print_version_mode() { let rust_version = env!("RUST_VERSION_INTERNAL"); let debug_release = if cfg!(debug_assertions) { "debug" } else { "release" }; let processors = get_all_available_threads(); let info = os_info::get(); #[allow(unused_mut)] let mut features: Vec<&str> = vec![]; #[cfg(feature = "heif")] features.push("heif"); #[cfg(feature = "libavif")] features.push("libavif"); #[cfg(feature = "libraw")] features.push("libraw"); info!( "App version: {CZKAWKA_VERSION}, {debug_release} mode, rust {rust_version}, os {} {} [{} {}], {processors} cpu/threads, features({}): [{}]", info.os_type(), info.version(), std::env::consts::ARCH, info.bitness(), features.len(), features.join(", ") ); if cfg!(debug_assertions) { warn!("You are running debug version of app which is a lot of slower than release version."); } if option_env!("USING_CRANELIFT").is_some() { warn!("You are running app with cranelift which is intended only for fast compilation, not runtime performance."); } } pub fn set_default_number_of_threads() { set_number_of_threads(get_all_available_threads()); } pub fn set_number_of_threads(thread_number: usize) { NUMBER_OF_THREADS.set(thread_number); let additional_message = if thread_number == 0 { " (0 - means that all available threads will be used)" } else { "" }; debug!("Number of threads set to {thread_number}{additional_message}"); rayon::ThreadPoolBuilder::new() .num_threads(get_number_of_threads()) .stack_size(DEFAULT_WORKER_THREAD_SIZE) .build_global() .expect("Cannot set number of threads"); } pub const RAW_IMAGE_EXTENSIONS: &[&str] = &[ "mrw", "arw", "srf", "sr2", "mef", "orf", "srw", "erf", "kdc", "kdc", "dcs", "rw2", "raf", "dcr", "dng", "pef", "crw", "iiq", "3fr", "nrw", "nef", "mos", "cr2", "ari", ]; pub const JXL_IMAGE_EXTENSIONS: &[&str] = &["jxl"]; #[cfg(feature = "libavif")] pub const IMAGE_RS_EXTENSIONS: &[&str] = &[ "jpg", "jpeg", "png", "bmp", "tiff", "tif", "tga", "ff", "jif", "jfi", "webp", "gif", "ico", "exr", "qoi", "avif", ]; #[cfg(not(feature = "libavif"))] pub const IMAGE_RS_EXTENSIONS: &[&str] = &["jpg", "jpeg", "png", "bmp", "tiff", "tif", "tga", "ff", "jif", "jfi", "webp", "gif", "ico", "exr", "qoi"]; #[cfg(feature = "libavif")] pub const IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS: &[&str] = &["jpg", "jpeg", "png", "tiff", "tif", "tga", "ff", "jif", "jfi", "bmp", "webp", "exr", "qoi", "avif"]; #[cfg(not(feature = "libavif"))] pub const IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS: &[&str] = &["jpg", "jpeg", "png", "tiff", "tif", "tga", "ff", "jif", "jfi", "bmp", "webp", "exr", "qoi"]; #[cfg(feature = "libavif")] pub const IMAGE_RS_BROKEN_FILES_EXTENSIONS: &[&str] = &[ "jpg", "jpeg", "png", "tiff", "tif", "tga", "ff", "jif", "jfi", "gif", "bmp", "ico", "jfif", "jpe", "pnz", "dib", "webp", "exr", "avif", ]; #[cfg(not(feature = "libavif"))] pub const IMAGE_RS_BROKEN_FILES_EXTENSIONS: &[&str] = &[ "jpg", "jpeg", "png", "tiff", "tif", "tga", "ff", "jif", "jfi", "gif", "bmp", "ico", "jfif", "jpe", "pnz", "dib", "webp", "exr", ]; pub const HEIC_EXTENSIONS: &[&str] = &["heif", "heifs", "heic", "heics", "avci", "avcs"]; pub const ZIP_FILES_EXTENSIONS: &[&str] = &["zip", "jar"]; pub const PDF_FILES_EXTENSIONS: &[&str] = &["pdf"]; pub const AUDIO_FILES_EXTENSIONS: &[&str] = &[ "mp3", "flac", "wav", "ogg", "m4a", "aac", "aiff", "pcm", "aif", "aiff", "aifc", "m3a", "mp2", "mp4a", "mp2a", "mpga", "wave", "weba", "wma", "oga", ]; pub const VIDEO_FILES_EXTENSIONS: &[&str] = &[ "mp4", "mpv", "flv", "mp4a", "webm", "mpg", "mp2", "mpeg", "m4p", "m4v", "avi", "wmv", "qt", "mov", "swf", "mkv", ]; pub const LOOP_DURATION: u32 = 20; //ms pub const SEND_PROGRESS_DATA_TIME_BETWEEN: u32 = 200; //ms pub fn remove_folder_if_contains_only_empty_folders(path: impl AsRef, remove_to_trash: bool) -> Result<(), String> { let path = path.as_ref(); if !path.is_dir() { return Err(format!("Trying to remove folder \"{}\" which is not a directory", path.to_string_lossy())); } let mut entries_to_check = Vec::new(); let Ok(initial_entry) = path.read_dir() else { return Err(format!("Cannot read directory \"{}\"", path.to_string_lossy())); }; for entry in initial_entry { if let Ok(entry) = entry { entries_to_check.push(entry); } else { return Err(format!("Cannot read entry from directory \"{}\"", path.to_string_lossy())); } } loop { let Some(entry) = entries_to_check.pop() else { break; }; let Some(file_type) = entry.file_type().ok() else { return Err(format!( "Folder contains file with unknown type \"{}\" inside \"{}\"", entry.path().to_string_lossy(), path.to_string_lossy() )); }; if !file_type.is_dir() { return Err(format!("Folder contains file \"{}\" inside \"{}\"", entry.path().to_string_lossy(), path.to_string_lossy())); } let Ok(internal_read_dir) = entry.path().read_dir() else { return Err(format!( "Cannot read directory \"{}\" inside \"{}\"", entry.path().to_string_lossy(), path.to_string_lossy() )); }; for internal_elements in internal_read_dir { if let Ok(internal_element) = internal_elements { entries_to_check.push(internal_element); } else { return Err(format!( "Cannot read entry from directory \"{}\" inside \"{}\"", entry.path().to_string_lossy(), path.to_string_lossy() )); } } } if remove_to_trash { trash::delete(path).map_err(|e| format!("Cannot move folder \"{}\" to trash, reason {e}", path.to_string_lossy())) } else { fs::remove_dir_all(path).map_err(|e| format!("Cannot remove directory \"{}\", reason {e}", path.to_string_lossy())) } } pub fn open_cache_folder(cache_file_name: &str, save_to_cache: bool, use_json: bool, warnings: &mut Vec) -> Option<((Option, PathBuf), (Option, PathBuf))> { if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") { let cache_dir = PathBuf::from(proj_dirs.cache_dir()); let cache_file = cache_dir.join(cache_file_name); let cache_file_json = cache_dir.join(cache_file_name.replace(".bin", ".json")); let mut file_handler_default = None; let mut file_handler_json = None; if save_to_cache { if cache_dir.exists() { if !cache_dir.is_dir() { warnings.push(format!("Config dir \"{}\" is a file!", cache_dir.to_string_lossy())); return None; } } else if let Err(e) = fs::create_dir_all(&cache_dir) { warnings.push(format!("Cannot create config dir \"{}\", reason {e}", cache_dir.to_string_lossy())); return None; } file_handler_default = Some(match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) { Ok(t) => t, Err(e) => { warnings.push(format!("Cannot create or open cache file \"{}\", reason {e}", cache_file.to_string_lossy())); return None; } }); if use_json { file_handler_json = Some(match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file_json) { Ok(t) => t, Err(e) => { warnings.push(format!("Cannot create or open cache file \"{}\", reason {e}", cache_file_json.to_string_lossy())); return None; } }); } } else { if let Ok(t) = OpenOptions::new().read(true).open(&cache_file) { file_handler_default = Some(t); } else { if use_json { file_handler_json = Some(OpenOptions::new().read(true).open(&cache_file_json).ok()?); } else { // messages.push(format!("Cannot find or open cache file {cache_file:?}")); // No error or warning return None; } } }; return Some(((file_handler_default, cache_file), (file_handler_json, cache_file_json))); } None } pub fn split_path(path: &Path) -> (String, String) { match (path.parent(), path.file_name()) { (Some(dir), Some(file)) => (dir.to_string_lossy().to_string(), file.to_string_lossy().into_owned()), (Some(dir), None) => (dir.to_string_lossy().to_string(), String::new()), (None, _) => (String::new(), String::new()), } } pub fn split_path_compare(path_a: &Path, path_b: &Path) -> Ordering { match path_a.parent().cmp(&path_b.parent()) { Ordering::Equal => path_a.file_name().cmp(&path_b.file_name()), other => other, } } pub fn create_crash_message(library_name: &str, file_path: &str, home_library_url: &str) -> String { format!("{library_name} library crashed when opening \"{file_path}\", please check if this is fixed with the latest version of {library_name} and if it is not fixed, please report bug here - {home_library_url}") } pub fn regex_check(expression_item: &SingleExcludedItem, directory_name: &str) -> bool { if expression_item.expression_splits.is_empty() { return true; } // Early checking if directory contains all parts needed by expression for split in &expression_item.unique_extensions_splits { if !directory_name.contains(split) { return false; } } // `git*` shouldn't be true for `/gitsfafasfs` if !expression_item.expression.starts_with('*') && directory_name .find(&expression_item.expression_splits[0]) .expect("Cannot fail, because split must exists in directory_name") > 0 { return false; } // `*home` shouldn't be true for `/homeowner` if !expression_item.expression.ends_with('*') && !directory_name.ends_with(expression_item.expression_splits.last().expect("Cannot fail, because at least one item is available")) { return false; } // At the end we check if parts between * are correctly positioned let mut last_split_point = directory_name.find(&expression_item.expression_splits[0]).expect("Cannot fail, because is checked earlier"); let mut current_index: usize = 0; let mut found_index: usize; for spl in &expression_item.expression_splits[1..] { found_index = match directory_name[current_index..].find(spl) { Some(t) => t, None => return false, }; current_index = last_split_point + spl.len(); last_split_point = found_index + current_index; } true } pub fn normalize_windows_path(path_to_change: impl AsRef) -> PathBuf { let path = path_to_change.as_ref(); // Don't do anything, because network path may be case intensive if path.to_string_lossy().starts_with('\\') { return path.to_path_buf(); } match path.to_str() { Some(path) if path.is_char_boundary(1) => { let replaced = path.replace('/', "\\"); let mut new_path = OsString::new(); if replaced[1..].starts_with(':') { new_path.push(replaced[..1].to_ascii_uppercase()); new_path.push(replaced[1..].to_ascii_lowercase()); } else { new_path.push(replaced.to_ascii_lowercase()); } PathBuf::from(new_path) } _ => path.to_path_buf(), } } pub fn check_folder_children( dir_result: &mut Vec, warnings: &mut Vec, entry_data: &DirEntry, recursive_search: bool, directories: &Directories, excluded_items: &ExcludedItems, ) { if !recursive_search { return; } let next_item = entry_data.path(); if directories.is_excluded(&next_item) { return; } if excluded_items.is_excluded(&next_item) { return; } #[cfg(target_family = "unix")] if directories.exclude_other_filesystems() { match directories.is_on_other_filesystems(&next_item) { Ok(true) => return, Err(e) => warnings.push(e), _ => (), } } dir_result.push(next_item); } // Here we assume, that internal Vec<> have at least 1 object #[allow(clippy::ptr_arg)] pub fn delete_files_custom(items: &Vec<&Vec>, delete_method: &DeleteMethod, text_messages: &mut Messages, dry_run: bool) -> (u64, usize, usize) where T: ResultEntry + Clone, { let res = items .iter() .map(|values| { let mut gained_space: u64 = 0; let mut removed_files: usize = 0; let mut failed_to_remove_files: usize = 0; let mut infos = Vec::new(); let mut errors = Vec::new(); let mut all_values = (*values).clone(); let len = all_values.len(); // Sorted from smallest to biggest or oldest to newest all_values.sort_unstable_by_key(match delete_method { DeleteMethod::AllExceptBiggest | DeleteMethod::AllExceptSmallest | DeleteMethod::OneBiggest | DeleteMethod::OneSmallest => ResultEntry::get_size, _ => ResultEntry::get_modified_date, }); if delete_method == &DeleteMethod::HardLink { let original_file = &all_values[0]; for file_entry in &all_values[1..] { if dry_run { infos.push(format!( "dry_run - would create hardlink from \"{}\" to \"{}\"", original_file.get_path().to_string_lossy(), file_entry.get_path().to_string_lossy() )); } else { if dry_run { infos.push(format!( "Replace file \"{}\" with hard link to \"{}\"", original_file.get_path().to_string_lossy(), file_entry.get_path().to_string_lossy() )); } else { if let Err(e) = make_hard_link(original_file.get_path(), file_entry.get_path()) { errors.push(format!( "Cannot create hard link from \"{}\" to \"{}\" - {e}", file_entry.get_path().to_string_lossy(), original_file.get_path().to_string_lossy() )); failed_to_remove_files += 1; } else { gained_space += 1; removed_files += 1; } } } } return (infos, errors, gained_space, removed_files, failed_to_remove_files); } let items = match delete_method { DeleteMethod::Delete => &all_values, DeleteMethod::AllExceptNewest | DeleteMethod::AllExceptBiggest => &all_values[..(len - 1)], DeleteMethod::AllExceptOldest | DeleteMethod::AllExceptSmallest => &all_values[1..], DeleteMethod::OneOldest | DeleteMethod::OneSmallest => &all_values[..1], DeleteMethod::OneNewest | DeleteMethod::OneBiggest => &all_values[(len - 1)..], DeleteMethod::HardLink | DeleteMethod::None => unreachable!("HardLink and None should be handled before"), }; for i in items { if dry_run { infos.push(format!("dry_run - would delete file: \"{}\"", i.get_path().to_string_lossy())); } else { if let Err(e) = fs::remove_file(i.get_path()) { errors.push(format!("Cannot delete file: \"{}\" - {e}", i.get_path().to_string_lossy())); failed_to_remove_files += 1; } else { removed_files += 1; gained_space += i.get_size(); } } } (infos, errors, gained_space, removed_files, failed_to_remove_files) }) .collect::>(); let mut gained_space = 0; let mut removed_files = 0; let mut failed_to_remove_files = 0; for (infos, errors, gained_space_v, removed_files_v, failed_to_remove_files_v) in res { text_messages.messages.extend(infos); text_messages.errors.extend(errors); gained_space += gained_space_v; removed_files += removed_files_v; failed_to_remove_files += failed_to_remove_files_v; } (gained_space, removed_files, failed_to_remove_files) } pub fn filter_reference_folders_generic(entries_to_check: Vec>, directories: &Directories) -> Vec<(T, Vec)> where T: ResultEntry, { entries_to_check .into_iter() .filter_map(|vec_file_entry| { let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) = vec_file_entry.into_iter().partition(|e| directories.is_in_referenced_directory(e.get_path())); if normal_files.is_empty() { None } else { files_from_referenced_folders.pop().map(|file| (file, normal_files)) } }) .collect::)>>() } pub fn prepare_thread_handler_common( progress_sender: Option<&Sender>, sstage: CurrentStage, max_value: usize, test_type: (ToolType, CheckingMethod), ) -> (JoinHandle<()>, Arc, Arc, AtomicBool) { let (tool_type, checking_method) = test_type; assert_ne!(tool_type, ToolType::None, "ToolType::None should not exist"); let progress_thread_run = Arc::new(AtomicBool::new(true)); let atomic_counter = Arc::new(AtomicUsize::new(0)); let check_was_stopped = AtomicBool::new(false); let progress_thread_sender = if let Some(progress_sender) = progress_sender { let progress_send = progress_sender.clone(); let progress_thread_run = progress_thread_run.clone(); let atomic_counter = atomic_counter.clone(); thread::spawn(move || { // Use earlier time, to send immediately first message let mut time_since_last_send = SystemTime::now() - Duration::from_secs(10u64); loop { if time_since_last_send.elapsed().expect("Cannot count time backwards").as_millis() > SEND_PROGRESS_DATA_TIME_BETWEEN as u128 { let progress_data = ProgressData { sstage, checking_method, current_stage_idx: sstage.get_current_stage(), max_stage_idx: tool_type.get_max_stage(checking_method), entries_checked: atomic_counter.load(atomic::Ordering::Relaxed), entries_to_check: max_value, tool_type, }; progress_data.validate(); progress_send.send(progress_data).expect("Cannot send progress data"); time_since_last_send = SystemTime::now(); } if !progress_thread_run.load(atomic::Ordering::Relaxed) { break; } sleep(Duration::from_millis(LOOP_DURATION as u64)); } }) } else { thread::spawn(|| {}) }; (progress_thread_sender, progress_thread_run, atomic_counter, check_was_stopped) } #[inline] pub fn check_if_stop_received(stop_receiver: Option<&crossbeam_channel::Receiver<()>>) -> bool { if let Some(stop_receiver) = stop_receiver { if stop_receiver.try_recv().is_ok() { return true; } } false } #[fun_time(message = "send_info_and_wait_for_ending_all_threads", level = "debug")] pub fn send_info_and_wait_for_ending_all_threads(progress_thread_run: &Arc, progress_thread_handle: JoinHandle<()>) { progress_thread_run.store(false, atomic::Ordering::Relaxed); progress_thread_handle.join().expect("Cannot join progress thread - quite fatal error, but happens rarely"); } #[cfg(test)] mod test { use std::fs; use std::io::Write; use std::path::{Path, PathBuf}; use tempfile::tempdir; use crate::common::{normalize_windows_path, regex_check, remove_folder_if_contains_only_empty_folders}; use crate::common_items::new_excluded_item; #[test] fn test_remove_folder_if_contains_only_empty_folders() { let dir = tempdir().expect("Cannot create temporary directory"); let sub_dir = dir.path().join("sub_dir"); fs::create_dir(&sub_dir).expect("Cannot create directory"); // Test with empty directory assert!(remove_folder_if_contains_only_empty_folders(&sub_dir, false).is_ok()); assert!(!Path::new(&sub_dir).exists()); // Test with directory containing an empty directory fs::create_dir(&sub_dir).expect("Cannot create directory"); fs::create_dir(sub_dir.join("empty_sub_dir")).expect("Cannot create directory"); assert!(remove_folder_if_contains_only_empty_folders(&sub_dir, false).is_ok()); assert!(!Path::new(&sub_dir).exists()); // Test with directory containing a file fs::create_dir(&sub_dir).expect("Cannot create directory"); let mut file = fs::File::create(sub_dir.join("file.txt")).expect("Cannot create file"); writeln!(file, "Hello, world!").expect("Cannot write to file"); assert!(remove_folder_if_contains_only_empty_folders(&sub_dir, false).is_err()); assert!(Path::new(&sub_dir).exists()); } #[test] fn test_regex() { assert!(regex_check(&new_excluded_item("*"), "/home/rafal")); assert!(regex_check(&new_excluded_item("*home*"), "/home/rafal")); assert!(regex_check(&new_excluded_item("*home"), "/home")); assert!(regex_check(&new_excluded_item("*home/"), "/home/")); assert!(regex_check(&new_excluded_item("*home/*"), "/home/")); assert!(regex_check(&new_excluded_item("*.git*"), "/home/.git")); assert!(regex_check(&new_excluded_item("*/home/rafal*rafal*rafal*rafal*"), "/home/rafal/rafalrafalrafal")); assert!(regex_check(&new_excluded_item("AAA"), "AAA")); assert!(regex_check(&new_excluded_item("AAA*"), "AAABDGG/QQPW*")); assert!(!regex_check(&new_excluded_item("*home"), "/home/")); assert!(!regex_check(&new_excluded_item("*home"), "/homefasfasfasfasf/")); assert!(!regex_check(&new_excluded_item("*home"), "/homefasfasfasfasf")); assert!(!regex_check(&new_excluded_item("rafal*afal*fal"), "rafal")); assert!(!regex_check(&new_excluded_item("rafal*a"), "rafal")); assert!(!regex_check(&new_excluded_item("AAAAAAAA****"), "/AAAAAAAAAAAAAAAAA")); assert!(!regex_check(&new_excluded_item("*.git/*"), "/home/.git")); assert!(!regex_check(&new_excluded_item("*home/*koc"), "/koc/home/")); assert!(!regex_check(&new_excluded_item("*home/"), "/home")); assert!(!regex_check(&new_excluded_item("*TTT"), "/GGG")); assert!(regex_check( &new_excluded_item("*/home/*/.local/share/containers"), "/var/home/roman/.local/share/containers" )); if cfg!(target_family = "windows") { assert!(regex_check(&new_excluded_item("*\\home"), "C:\\home")); assert!(regex_check(&new_excluded_item("*/home"), "C:\\home")); } } #[test] fn test_windows_path() { assert_eq!(PathBuf::from("C:\\path.txt"), normalize_windows_path("c:/PATH.tXt")); assert_eq!(PathBuf::from("H:\\reka\\weza\\roman.txt"), normalize_windows_path("h:/RekA/Weza\\roMan.Txt")); assert_eq!(PathBuf::from("T:\\a"), normalize_windows_path("T:\\A")); assert_eq!(PathBuf::from("\\\\aBBa"), normalize_windows_path("\\\\aBBa")); assert_eq!(PathBuf::from("a"), normalize_windows_path("a")); assert_eq!(PathBuf::from(""), normalize_windows_path("")); } } czkawka_core-8.0.0/src/common_cache.rs000064400000000000000000000266151046102023000160560ustar 00000000000000use std::collections::BTreeMap; use std::io::{BufReader, BufWriter}; use fun_time::fun_time; use image::imageops::FilterType; use image_hasher::HashAlg; use log::{debug, error}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use serde::{Deserialize, Serialize}; use crate::common; use crate::common_messages::Messages; use crate::common_traits::ResultEntry; use crate::duplicate::HashType; use crate::similar_images::{convert_algorithm_to_string, convert_filters_to_string}; const CACHE_VERSION: &str = "70"; const CACHE_IMAGE_VERSION: &str = "80"; pub fn get_broken_files_cache_file() -> String { format!("cache_broken_files_{CACHE_VERSION}.bin") } pub fn get_similar_images_cache_file(hash_size: &u8, hash_alg: &HashAlg, image_filter: &FilterType) -> String { format!( "cache_similar_images_{hash_size}_{}_{}_{CACHE_IMAGE_VERSION}.bin", convert_algorithm_to_string(hash_alg), convert_filters_to_string(image_filter), ) } pub fn get_similar_videos_cache_file() -> String { format!("cache_similar_videos_{CACHE_VERSION}.bin") } pub fn get_similar_music_cache_file(checking_tags: bool) -> String { if checking_tags { format!("cache_same_music_tags_{CACHE_VERSION}.bin") } else { format!("cache_same_music_fingerprints_{CACHE_VERSION}.bin") } } pub fn get_duplicate_cache_file(type_of_hash: &HashType, is_prehash: bool) -> String { let prehash_str = if is_prehash { "_prehash" } else { "" }; format!("cache_duplicates_{type_of_hash:?}{prehash_str}_{CACHE_VERSION}.bin") } #[fun_time(message = "save_cache_to_file_generalized", level = "debug")] pub fn save_cache_to_file_generalized(cache_file_name: &str, hashmap: &BTreeMap, save_also_as_json: bool, minimum_file_size: u64) -> Messages where T: Serialize + ResultEntry + Sized + Send + Sync, { let mut text_messages = Messages::new(); if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = common::open_cache_folder(cache_file_name, true, save_also_as_json, &mut text_messages.warnings) { let hashmap_to_save = hashmap.values().filter(|t| t.get_size() >= minimum_file_size).collect::>(); { let writer = BufWriter::new(file_handler.expect("Cannot fail, because for saving, this always exists")); if let Err(e) = bincode::serialize_into(writer, &hashmap_to_save) { text_messages.warnings.push(format!("Cannot write data to cache file {cache_file:?}, reason {e}")); debug!("Failed to save cache to file {cache_file:?}"); return text_messages; } debug!("Saved binary to file {cache_file:?}"); } if save_also_as_json { if let Some(file_handler_json) = file_handler_json { let writer = BufWriter::new(file_handler_json); if let Err(e) = serde_json::to_writer(writer, &hashmap_to_save) { text_messages.warnings.push(format!("Cannot write data to cache file {cache_file_json:?}, reason {e}")); debug!("Failed to save cache to file {cache_file_json:?}"); return text_messages; } debug!("Saved json to file {cache_file_json:?}"); } } text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len())); debug!("Properly saved to file {} cache entries.", hashmap.len()); } else { debug!("Failed to save cache to file {cache_file_name} because not exists"); } text_messages } pub fn extract_loaded_cache( loaded_hash_map: &BTreeMap, files_to_check: BTreeMap, records_already_cached: &mut BTreeMap, non_cached_files_to_check: &mut BTreeMap, ) where T: Clone, { for (name, file_entry) in files_to_check { if let Some(cached_file_entry) = loaded_hash_map.get(&name) { records_already_cached.insert(name, cached_file_entry.clone()); } else { non_cached_files_to_check.insert(name, file_entry); } } } #[fun_time(message = "load_cache_from_file_generalized_by_path", level = "debug")] pub fn load_cache_from_file_generalized_by_path(cache_file_name: &str, delete_outdated_cache: bool, used_files: &BTreeMap) -> (Messages, Option>) where for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone, { let (text_messages, vec_loaded_cache) = load_cache_from_file_generalized(cache_file_name, delete_outdated_cache, used_files); let Some(vec_loaded_entries) = vec_loaded_cache else { return (text_messages, None); }; debug!("Converting cache Vec into BTreeMap"); let map_loaded_entries: BTreeMap = vec_loaded_entries .into_iter() .map(|file_entry| (file_entry.get_path().to_string_lossy().into_owned(), file_entry)) .collect(); debug!("Converted cache Vec into BTreeMap"); (text_messages, Some(map_loaded_entries)) } #[fun_time(message = "load_cache_from_file_generalized_by_size", level = "debug")] pub fn load_cache_from_file_generalized_by_size( cache_file_name: &str, delete_outdated_cache: bool, cache_not_converted: &BTreeMap>, ) -> (Messages, Option>>) where for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone, { debug!("Converting cache BtreeMap> into BTreeMap"); let mut used_files: BTreeMap = Default::default(); for file_entry in cache_not_converted.values().flatten() { used_files.insert(file_entry.get_path().to_string_lossy().into_owned(), file_entry.clone()); } debug!("Converted cache BtreeMap> into BTreeMap"); let (text_messages, vec_loaded_cache) = load_cache_from_file_generalized(cache_file_name, delete_outdated_cache, &used_files); let Some(vec_loaded_entries) = vec_loaded_cache else { return (text_messages, None); }; debug!("Converting cache Vec into BTreeMap>"); let mut map_loaded_entries: BTreeMap> = Default::default(); for file_entry in vec_loaded_entries { map_loaded_entries.entry(file_entry.get_size()).or_default().push(file_entry); } debug!("Converted cache Vec into BTreeMap>"); (text_messages, Some(map_loaded_entries)) } #[fun_time(message = "load_cache_from_file_generalized_by_path_from_size", level = "debug")] pub fn load_cache_from_file_generalized_by_path_from_size( cache_file_name: &str, delete_outdated_cache: bool, cache_not_converted: &BTreeMap>, ) -> (Messages, Option>) where for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone, { debug!("Converting cache BtreeMap> into BTreeMap"); let mut used_files: BTreeMap = Default::default(); for file_entry in cache_not_converted.values().flatten() { used_files.insert(file_entry.get_path().to_string_lossy().into_owned(), file_entry.clone()); } debug!("Converted cache BtreeMap> into BTreeMap"); let (text_messages, vec_loaded_cache) = load_cache_from_file_generalized(cache_file_name, delete_outdated_cache, &used_files); let Some(vec_loaded_entries) = vec_loaded_cache else { return (text_messages, None); }; debug!("Converting cache Vec into BTreeMap"); let map_loaded_entries: BTreeMap = vec_loaded_entries .into_iter() .map(|file_entry| (file_entry.get_path().to_string_lossy().into_owned(), file_entry)) .collect(); debug!("Converted cache Vec into BTreeMap"); (text_messages, Some(map_loaded_entries)) } #[fun_time(message = "load_cache_from_file_generalized", level = "debug")] fn load_cache_from_file_generalized(cache_file_name: &str, delete_outdated_cache: bool, used_files: &BTreeMap) -> (Messages, Option>) where for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone, { let mut text_messages = Messages::new(); if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = common::open_cache_folder(cache_file_name, false, true, &mut text_messages.warnings) { let mut vec_loaded_entries: Vec; if let Some(file_handler) = file_handler { let reader = BufReader::new(file_handler); // TODO cannot use limits // Probably also save function needs to be updated // Without it loading not working // let options = bincode::DefaultOptions::new().with_limit(4 * 1024 * 1024 * 1024); // vec_loaded_entries = match options.deserialize_from(reader) { vec_loaded_entries = match bincode::deserialize_from(reader) { Ok(t) => t, Err(e) => { text_messages.warnings.push(format!("Failed to load data from cache file {cache_file:?}, reason {e}")); error!("Failed to load cache from file {cache_file:?}"); return (text_messages, None); } }; } else { let reader = BufReader::new(file_handler_json.expect("This cannot fail, because if file_handler is None, then this cannot be None")); vec_loaded_entries = match serde_json::from_reader(reader) { Ok(t) => t, Err(e) => { text_messages .warnings .push(format!("Failed to load data from json cache file {cache_file_json:?}, reason {e}")); debug!("Failed to load cache from file {cache_file:?}"); return (text_messages, None); } }; } debug!( "Starting removing outdated cache entries (removing non existent files from cache - {})", delete_outdated_cache ); let initial_number_of_entries = vec_loaded_entries.len(); vec_loaded_entries = vec_loaded_entries .into_par_iter() .filter(|file_entry| { let path = file_entry.get_path(); let file_entry_path_str = path.to_string_lossy().to_string(); if let Some(used_file) = used_files.get(&file_entry_path_str) { if file_entry.get_size() != used_file.get_size() { return false; } if file_entry.get_modified_date() != used_file.get_modified_date() { return false; } } if delete_outdated_cache && !path.exists() { return false; } true }) .collect(); debug!( "Completed removing outdated cache entries, removed {} out of all {} entries", initial_number_of_entries - vec_loaded_entries.len(), initial_number_of_entries ); text_messages.messages.push(format!("Properly loaded {} cache entries.", vec_loaded_entries.len())); debug!("Loaded cache from file {cache_file_name} (or json alternative) - {} results", vec_loaded_entries.len()); return (text_messages, Some(vec_loaded_entries)); } debug!("Failed to load cache from file {cache_file_name} because not exists"); (text_messages, None) } czkawka_core-8.0.0/src/common_dir_traversal.rs000064400000000000000000000643061046102023000176530ustar 00000000000000use std::collections::BTreeMap; use std::fmt::Display; use std::fs; use std::fs::{DirEntry, FileType, Metadata}; #[cfg(target_family = "unix")] use std::os::unix::fs::MetadataExt; use std::path::{Path, PathBuf}; use std::sync::atomic::Ordering; use std::time::UNIX_EPOCH; use crossbeam_channel::{Receiver, Sender}; use fun_time::fun_time; use log::debug; use rayon::prelude::*; use serde::{Deserialize, Serialize}; use crate::common::{check_if_stop_received, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads}; use crate::common_directory::Directories; use crate::common_extensions::Extensions; use crate::common_items::ExcludedItems; use crate::common_tool::CommonToolData; use crate::common_traits::ResultEntry; use crate::flc; use crate::progress_data::{CurrentStage, ProgressData}; #[derive(Debug, PartialEq, Eq, Clone, Copy, Default)] pub enum ToolType { Duplicate, EmptyFolders, EmptyFiles, InvalidSymlinks, BrokenFiles, BadExtensions, BigFile, SameMusic, SimilarImages, SimilarVideos, TemporaryFiles, #[default] None, } #[derive(PartialEq, Eq, Clone, Debug, Copy, Default, Deserialize, Serialize)] pub enum CheckingMethod { #[default] None, Name, SizeName, Size, Hash, AudioTags, AudioContent, } #[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct FileEntry { pub path: PathBuf, pub size: u64, pub modified_date: u64, } impl ResultEntry for FileEntry { fn get_path(&self) -> &Path { &self.path } fn get_modified_date(&self) -> u64 { self.modified_date } fn get_size(&self) -> u64 { self.size } } // Symlinks #[derive(Clone, Debug, PartialEq, Eq, Copy, Deserialize, Serialize)] pub enum ErrorType { InfiniteRecursion, NonExistentFile, } impl Display for ErrorType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { ErrorType::InfiniteRecursion => write!(f, "Infinite recursion"), ErrorType::NonExistentFile => write!(f, "Non existent file"), } } } #[derive(Copy, Clone, Eq, PartialEq)] pub enum Collect { InvalidSymlinks, Files, } #[derive(Eq, PartialEq, Copy, Clone, Debug)] enum EntryType { File, Dir, Symlink, Other, } pub struct DirTraversalBuilder<'a, 'b, F> { group_by: Option, root_dirs: Vec, stop_receiver: Option<&'a Receiver<()>>, progress_sender: Option<&'b Sender>, minimal_file_size: Option, maximal_file_size: Option, checking_method: CheckingMethod, collect: Collect, recursive_search: bool, directories: Option, excluded_items: Option, extensions: Option, tool_type: ToolType, } pub struct DirTraversal<'a, 'b, F> { group_by: F, root_dirs: Vec, stop_receiver: Option<&'a Receiver<()>>, progress_sender: Option<&'b Sender>, recursive_search: bool, directories: Directories, excluded_items: ExcludedItems, extensions: Extensions, minimal_file_size: u64, maximal_file_size: u64, checking_method: CheckingMethod, tool_type: ToolType, collect: Collect, } impl<'a, 'b> Default for DirTraversalBuilder<'a, 'b, ()> { fn default() -> Self { Self::new() } } impl<'a, 'b> DirTraversalBuilder<'a, 'b, ()> { pub fn new() -> DirTraversalBuilder<'a, 'b, ()> { DirTraversalBuilder { group_by: None, root_dirs: vec![], stop_receiver: None, progress_sender: None, checking_method: CheckingMethod::None, minimal_file_size: None, maximal_file_size: None, collect: Collect::Files, recursive_search: false, directories: None, extensions: None, excluded_items: None, tool_type: ToolType::None, } } } impl<'a, 'b, F> DirTraversalBuilder<'a, 'b, F> { pub fn root_dirs(mut self, dirs: Vec) -> Self { self.root_dirs = dirs; self } pub fn common_data(mut self, common_tool_data: &CommonToolData) -> Self { self.root_dirs = common_tool_data.directories.included_directories.clone(); self.extensions = Some(common_tool_data.extensions.clone()); self.excluded_items = Some(common_tool_data.excluded_items.clone()); self.recursive_search = common_tool_data.recursive_search; self.minimal_file_size = Some(common_tool_data.minimal_file_size); self.maximal_file_size = Some(common_tool_data.maximal_file_size); self.tool_type = common_tool_data.tool_type; self.directories = Some(common_tool_data.directories.clone()); self } pub fn stop_receiver(mut self, stop_receiver: Option<&'a Receiver<()>>) -> Self { self.stop_receiver = stop_receiver; self } pub fn progress_sender(mut self, progress_sender: Option<&'b Sender>) -> Self { self.progress_sender = progress_sender; self } pub fn checking_method(mut self, checking_method: CheckingMethod) -> Self { self.checking_method = checking_method; self } pub fn minimal_file_size(mut self, minimal_file_size: u64) -> Self { self.minimal_file_size = Some(minimal_file_size); self } pub fn maximal_file_size(mut self, maximal_file_size: u64) -> Self { self.maximal_file_size = Some(maximal_file_size); self } pub fn collect(mut self, collect: Collect) -> Self { self.collect = collect; self } pub fn directories(mut self, directories: Directories) -> Self { self.directories = Some(directories); self } pub fn extensions(mut self, extensions: Extensions) -> Self { self.extensions = Some(extensions); self } pub fn excluded_items(mut self, excluded_items: ExcludedItems) -> Self { self.excluded_items = Some(excluded_items); self } pub fn recursive_search(mut self, recursive_search: bool) -> Self { self.recursive_search = recursive_search; self } pub fn tool_type(mut self, tool_type: ToolType) -> Self { self.tool_type = tool_type; self } #[cfg(target_family = "unix")] pub fn exclude_other_filesystems(mut self, exclude_other_filesystems: bool) -> Self { match self.directories { Some(ref mut directories) => directories.set_exclude_other_filesystems(exclude_other_filesystems), None => panic!("Directories is None"), } self } pub fn group_by(self, group_by: G) -> DirTraversalBuilder<'a, 'b, G> where G: Fn(&FileEntry) -> T, { DirTraversalBuilder { group_by: Some(group_by), root_dirs: self.root_dirs, stop_receiver: self.stop_receiver, progress_sender: self.progress_sender, directories: self.directories, extensions: self.extensions, excluded_items: self.excluded_items, recursive_search: self.recursive_search, maximal_file_size: self.maximal_file_size, minimal_file_size: self.minimal_file_size, collect: self.collect, checking_method: self.checking_method, tool_type: self.tool_type, } } pub fn build(self) -> DirTraversal<'a, 'b, F> { DirTraversal { group_by: self.group_by.expect("could not build"), root_dirs: self.root_dirs, stop_receiver: self.stop_receiver, progress_sender: self.progress_sender, checking_method: self.checking_method, minimal_file_size: self.minimal_file_size.unwrap_or(0), maximal_file_size: self.maximal_file_size.unwrap_or(u64::MAX), collect: self.collect, directories: self.directories.expect("could not build"), excluded_items: self.excluded_items.expect("could not build"), extensions: self.extensions.unwrap_or_default(), recursive_search: self.recursive_search, tool_type: self.tool_type, } } } pub enum DirTraversalResult { SuccessFiles { warnings: Vec, grouped_file_entries: BTreeMap>, }, Stopped, } fn entry_type(file_type: FileType) -> EntryType { if file_type.is_dir() { EntryType::Dir } else if file_type.is_symlink() { EntryType::Symlink } else if file_type.is_file() { EntryType::File } else { EntryType::Other } } impl<'a, 'b, F, T> DirTraversal<'a, 'b, F> where F: Fn(&FileEntry) -> T, T: Ord + PartialOrd, { #[fun_time(message = "run(collecting files/dirs)", level = "debug")] pub fn run(self) -> DirTraversalResult { assert_ne!(self.tool_type, ToolType::None, "Tool type cannot be None"); let mut all_warnings = vec![]; let mut grouped_file_entries: BTreeMap> = BTreeMap::new(); // Add root folders for finding let mut folders_to_check: Vec = self.root_dirs.clone(); let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) = prepare_thread_handler_common(self.progress_sender, CurrentStage::CollectingFiles, 0, (self.tool_type, self.checking_method)); let DirTraversal { collect, directories, excluded_items, extensions, recursive_search, minimal_file_size, maximal_file_size, stop_receiver, .. } = self; while !folders_to_check.is_empty() { if check_if_stop_received(stop_receiver) { send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); return DirTraversalResult::Stopped; } let segments: Vec<_> = folders_to_check .into_par_iter() .map(|current_folder| { let mut dir_result = vec![]; let mut warnings = vec![]; let mut fe_result = vec![]; let Some(read_dir) = common_read_dir(¤t_folder, &mut warnings) else { return (dir_result, warnings, fe_result); }; let mut counter = 0; // Check every sub folder/file/link etc. for entry in read_dir { let Some(entry_data) = common_get_entry_data(&entry, &mut warnings, ¤t_folder) else { continue; }; let Ok(file_type) = entry_data.file_type() else { continue }; match (entry_type(file_type), collect) { (EntryType::Dir, Collect::Files | Collect::InvalidSymlinks) => { process_dir_in_file_symlink_mode(recursive_search, entry_data, &directories, &mut dir_result, &mut warnings, &excluded_items); } (EntryType::File, Collect::Files) => { counter += 1; process_file_in_file_mode( entry_data, &mut warnings, &mut fe_result, &extensions, &directories, &excluded_items, minimal_file_size, maximal_file_size, ); } (EntryType::File, Collect::InvalidSymlinks) => { counter += 1; } (EntryType::Symlink, Collect::InvalidSymlinks) => { counter += 1; process_symlink_in_symlink_mode(entry_data, &mut warnings, &mut fe_result, &extensions, &directories, &excluded_items); } (EntryType::Symlink, Collect::Files) | (EntryType::Other, _) => { // nothing to do } } } if counter > 0 { // Increase counter in batch, because usually it may be slow to add multiple times atomic value atomic_counter.fetch_add(counter, Ordering::Relaxed); } (dir_result, warnings, fe_result) }) .collect(); let required_size = segments.iter().map(|(segment, _, _)| segment.len()).sum::(); folders_to_check = Vec::with_capacity(required_size); // Process collected data for (segment, warnings, fe_result) in segments { folders_to_check.extend(segment); all_warnings.extend(warnings); for fe in fe_result { let key = (self.group_by)(&fe); grouped_file_entries.entry(key).or_default().push(fe); } } } send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); debug!("Collected {} files", grouped_file_entries.values().map(Vec::len).sum::()); match collect { Collect::Files | Collect::InvalidSymlinks => DirTraversalResult::SuccessFiles { grouped_file_entries, warnings: all_warnings, }, } } } fn process_file_in_file_mode( entry_data: &DirEntry, warnings: &mut Vec, fe_result: &mut Vec, extensions: &Extensions, directories: &Directories, excluded_items: &ExcludedItems, minimal_file_size: u64, maximal_file_size: u64, ) { if !extensions.check_if_entry_have_valid_extension(entry_data) { return; } let current_file_name = entry_data.path(); if excluded_items.is_excluded(¤t_file_name) { return; } #[cfg(target_family = "unix")] if directories.exclude_other_filesystems() { match directories.is_on_other_filesystems(¤t_file_name) { Ok(true) => return, Err(e) => warnings.push(e), _ => (), } } let Some(metadata) = common_get_metadata_dir(entry_data, warnings, ¤t_file_name) else { return; }; if (minimal_file_size..=maximal_file_size).contains(&metadata.len()) { // Creating new file entry let fe: FileEntry = FileEntry { size: metadata.len(), modified_date: get_modified_time(&metadata, warnings, ¤t_file_name, false), path: current_file_name, }; fe_result.push(fe); } } fn process_dir_in_file_symlink_mode( recursive_search: bool, entry_data: &DirEntry, directories: &Directories, dir_result: &mut Vec, warnings: &mut Vec, excluded_items: &ExcludedItems, ) { if !recursive_search { return; } let dir_path = entry_data.path(); if directories.is_excluded(&dir_path) { return; } if excluded_items.is_excluded(&dir_path) { return; } #[cfg(target_family = "unix")] if directories.exclude_other_filesystems() { match directories.is_on_other_filesystems(&dir_path) { Ok(true) => return, Err(e) => warnings.push(e), _ => (), } } dir_result.push(dir_path); } fn process_symlink_in_symlink_mode( entry_data: &DirEntry, warnings: &mut Vec, fe_result: &mut Vec, extensions: &Extensions, directories: &Directories, excluded_items: &ExcludedItems, ) { if !extensions.check_if_entry_have_valid_extension(entry_data) { return; } let current_file_name = entry_data.path(); if excluded_items.is_excluded(¤t_file_name) { return; } #[cfg(target_family = "unix")] if directories.exclude_other_filesystems() { match directories.is_on_other_filesystems(¤t_file_name) { Ok(true) => return, Err(e) => warnings.push(e), _ => (), } } let Some(metadata) = common_get_metadata_dir(entry_data, warnings, ¤t_file_name) else { return; }; // Creating new file entry let fe: FileEntry = FileEntry { size: metadata.len(), modified_date: get_modified_time(&metadata, warnings, ¤t_file_name, false), path: current_file_name, }; fe_result.push(fe); } pub fn common_read_dir(current_folder: &Path, warnings: &mut Vec) -> Option>> { match fs::read_dir(current_folder) { Ok(t) => { // Make directory traversal order stable let mut r: Vec<_> = t.collect(); r.sort_by_key(|d| match d { Ok(f) => f.path(), _ => PathBuf::new(), }); Some(r) } Err(e) => { warnings.push(flc!("core_cannot_open_dir", dir = current_folder.to_string_lossy().to_string(), reason = e.to_string())); None } } } pub fn common_get_entry_data<'a>(entry: &'a Result, warnings: &mut Vec, current_folder: &Path) -> Option<&'a DirEntry> { let entry_data = match entry { Ok(t) => t, Err(e) => { warnings.push(flc!( "core_cannot_read_entry_dir", dir = current_folder.to_string_lossy().to_string(), reason = e.to_string() )); return None; } }; Some(entry_data) } pub fn common_get_metadata_dir(entry_data: &DirEntry, warnings: &mut Vec, current_folder: &Path) -> Option { let metadata: Metadata = match entry_data.metadata() { Ok(t) => t, Err(e) => { warnings.push(flc!( "core_cannot_read_metadata_dir", dir = current_folder.to_string_lossy().to_string(), reason = e.to_string() )); return None; } }; Some(metadata) } pub fn common_get_entry_data_metadata<'a>(entry: &'a Result, warnings: &mut Vec, current_folder: &Path) -> Option<(&'a DirEntry, Metadata)> { let entry_data = common_get_entry_data(entry, warnings, current_folder)?; let metadata = common_get_metadata_dir(entry_data, warnings, current_folder)?; Some((entry_data, metadata)) } pub fn get_modified_time(metadata: &Metadata, warnings: &mut Vec, current_file_name: &Path, is_folder: bool) -> u64 { match metadata.modified() { Ok(t) => match t.duration_since(UNIX_EPOCH) { Ok(d) => d.as_secs(), Err(_inspected) => { if is_folder { warnings.push(flc!("core_folder_modified_before_epoch", name = current_file_name.to_string_lossy().to_string())); } else { warnings.push(flc!("core_file_modified_before_epoch", name = current_file_name.to_string_lossy().to_string())); } 0 } }, Err(e) => { if is_folder { warnings.push(flc!( "core_folder_no_modification_date", name = current_file_name.to_string_lossy().to_string(), reason = e.to_string() )); } else { warnings.push(flc!( "core_file_no_modification_date", name = current_file_name.to_string_lossy().to_string(), reason = e.to_string() )); } 0 } } } #[cfg(target_family = "windows")] pub fn inode(_fe: &FileEntry) -> Option { None } #[cfg(target_family = "unix")] pub fn inode(fe: &FileEntry) -> Option { if let Ok(meta) = fs::metadata(&fe.path) { Some(meta.ino()) } else { None } } pub fn take_1_per_inode((k, mut v): (Option, Vec)) -> Vec { if k.is_some() { v.drain(1..); } v } #[cfg(test)] mod tests { use std::collections::HashSet; use std::fs::File; use std::io::prelude::*; use std::time::{Duration, SystemTime}; use std::{fs, io}; use once_cell::sync::Lazy; use tempfile::TempDir; use super::*; use crate::common_tool::*; impl CommonData for CommonToolData { fn get_cd(&self) -> &CommonToolData { self } fn get_cd_mut(&mut self) -> &mut CommonToolData { self } } static NOW: Lazy = Lazy::new(|| SystemTime::UNIX_EPOCH + Duration::new(100, 0)); const CONTENT: &[u8; 1] = b"a"; fn create_files(dir: &TempDir) -> io::Result<(PathBuf, PathBuf, PathBuf)> { let (src, hard, other) = (dir.path().join("a"), dir.path().join("b"), dir.path().join("c")); let mut file = File::create(&src)?; file.write_all(CONTENT)?; fs::hard_link(&src, &hard)?; file.set_modified(*NOW)?; let mut file = File::create(&other)?; file.write_all(CONTENT)?; file.set_modified(*NOW)?; Ok((src, hard, other)) } #[test] fn test_traversal() -> io::Result<()> { let dir = tempfile::Builder::new().tempdir()?; let (src, hard, other) = create_files(&dir)?; let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).expect("Cannot fail calculating duration since epoch").as_secs(); let mut common_data = CommonToolData::new(ToolType::SimilarImages); common_data.directories.set_included_directory([dir.path().to_owned()].to_vec()); common_data.set_minimal_file_size(0); match DirTraversalBuilder::new().group_by(|_fe| ()).common_data(&common_data).build().run() { DirTraversalResult::SuccessFiles { warnings: _, grouped_file_entries, } => { let actual: HashSet<_> = grouped_file_entries.into_values().flatten().collect(); assert_eq!( HashSet::from([ FileEntry { path: src, size: 1, modified_date: secs, }, FileEntry { path: hard, size: 1, modified_date: secs, }, FileEntry { path: other, size: 1, modified_date: secs, }, ]), actual ); } _ => { panic!("Expect SuccessFiles."); } }; Ok(()) } #[cfg(target_family = "unix")] #[test] fn test_traversal_group_by_inode() -> io::Result<()> { let dir = tempfile::Builder::new().tempdir()?; let (src, _, other) = create_files(&dir)?; let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).expect("Cannot fail calculating duration since epoch").as_secs(); let mut common_data = CommonToolData::new(ToolType::SimilarImages); common_data.directories.set_included_directory([dir.path().to_owned()].to_vec()); common_data.set_minimal_file_size(0); match DirTraversalBuilder::new().group_by(inode).common_data(&common_data).build().run() { DirTraversalResult::SuccessFiles { warnings: _, grouped_file_entries, } => { let actual: HashSet<_> = grouped_file_entries.into_iter().flat_map(take_1_per_inode).collect(); assert_eq!( HashSet::from([ FileEntry { path: src, size: 1, modified_date: secs, }, FileEntry { path: other, size: 1, modified_date: secs, }, ]), actual ); } _ => { panic!("Expect SuccessFiles."); } }; Ok(()) } #[cfg(target_family = "windows")] #[test] fn test_traversal_group_by_inode() -> io::Result<()> { let dir = tempfile::Builder::new().tempdir()?; let (src, hard, other) = create_files(&dir)?; let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).expect("Cannot fail duration from epoch").as_secs(); let mut common_data = CommonToolData::new(ToolType::SimilarImages); common_data.directories.set_included_directory([dir.path().to_owned()].to_vec()); common_data.set_minimal_file_size(0); match DirTraversalBuilder::new().group_by(inode).common_data(&common_data).build().run() { DirTraversalResult::SuccessFiles { warnings: _, grouped_file_entries, } => { let actual: HashSet<_> = grouped_file_entries.into_iter().flat_map(take_1_per_inode).collect(); assert_eq!( HashSet::from([ FileEntry { path: src, size: 1, modified_date: secs, }, FileEntry { path: hard, size: 1, modified_date: secs, }, FileEntry { path: other, size: 1, modified_date: secs, }, ]), actual ); } _ => { panic!("Expect SuccessFiles."); } }; Ok(()) } } czkawka_core-8.0.0/src/common_directory.rs000064400000000000000000000261731046102023000170160ustar 00000000000000use std::path::{Path, PathBuf}; #[cfg(target_family = "unix")] use std::{fs, os::unix::fs::MetadataExt}; use crate::common::normalize_windows_path; use crate::common_messages::Messages; use crate::flc; #[derive(Debug, Clone, Default)] pub struct Directories { pub excluded_directories: Vec, pub included_directories: Vec, pub reference_directories: Vec, pub exclude_other_filesystems: Option, #[cfg(target_family = "unix")] pub included_dev_ids: Vec, } impl Directories { pub fn new() -> Self { Default::default() } pub fn set_reference_directory(&mut self, reference_directory: &[PathBuf]) -> Messages { let mut messages: Messages = Messages::new(); self.reference_directories = reference_directory .iter() .filter_map(|directory| { let (dir, msg) = Self::canonicalize_and_clear_path(directory, false); messages.extend_with_another_messages(msg); dir }) .collect::>(); messages } pub fn set_included_directory(&mut self, included_directory: Vec) -> Messages { let mut messages: Messages = Messages::new(); if included_directory.is_empty() { messages.errors.push(flc!("core_missing_no_chosen_included_directory")); return messages; } let directories: Vec = included_directory; let mut checked_directories: Vec = Vec::new(); for directory in directories { let (dir, msg) = Self::canonicalize_and_clear_path(&directory, false); messages.extend_with_another_messages(msg); if let Some(dir) = dir { checked_directories.push(dir); } } if checked_directories.is_empty() { messages.warnings.push(flc!("core_included_directory_zero_valid_directories")); return messages; } self.included_directories = checked_directories; messages } pub fn set_excluded_directory(&mut self, excluded_directory: Vec) -> Messages { let mut messages: Messages = Messages::new(); if excluded_directory.is_empty() { return messages; } let directories: Vec = excluded_directory; let mut checked_directories: Vec = Vec::new(); for directory in directories { let directory_as_string = directory.to_string_lossy(); if directory_as_string == "/" { messages.errors.push(flc!("core_excluded_directory_pointless_slash")); break; } let (dir, msg) = Self::canonicalize_and_clear_path(&directory, true); messages.extend_with_another_messages(msg); if let Some(dir) = dir { checked_directories.push(dir); } } self.excluded_directories = checked_directories; messages } fn canonicalize_and_clear_path(directory: &Path, is_excluded: bool) -> (Option, Messages) { let mut messages = Messages::new(); let mut directory = directory.to_path_buf(); if !directory.exists() { if !is_excluded { messages.warnings.push(flc!("core_directory_must_exists", path = directory.to_string_lossy().to_string())); } return (None, messages); } if !directory.is_dir() { messages .warnings .push(flc!("core_directory_must_be_directory", path = directory.to_string_lossy().to_string())); return (None, messages); } // Try to canonicalize them if cfg!(windows) { // Only canonicalize if it's not a network path // This can be done by checking if path starts with \\?\UNC\ if let Ok(dir_can) = directory.canonicalize() { let dir_can_str = dir_can.to_string_lossy().to_string(); if let Some(dir_can_str) = dir_can_str.strip_prefix(r"\\?\") { if dir_can_str.chars().nth(1) == Some(':') { directory = PathBuf::from(dir_can_str); } } } } else { if let Ok(dir) = directory.canonicalize() { directory = dir; } } (Some(directory), messages) } #[cfg(target_family = "unix")] pub fn set_exclude_other_filesystems(&mut self, exclude_other_filesystems: bool) { self.exclude_other_filesystems = Some(exclude_other_filesystems); } pub fn optimize_directories(&mut self, recursive_search: bool) -> Messages { let mut messages: Messages = Messages::new(); let mut optimized_included: Vec = Vec::new(); let mut optimized_excluded: Vec = Vec::new(); if cfg!(target_family = "windows") { self.included_directories = self.included_directories.iter().map(normalize_windows_path).collect(); self.excluded_directories = self.excluded_directories.iter().map(normalize_windows_path).collect(); self.reference_directories = self.reference_directories.iter().map(normalize_windows_path).collect(); } // Remove duplicated entries like: "/", "/" self.excluded_directories.sort_unstable(); self.included_directories.sort_unstable(); self.reference_directories.sort_unstable(); self.excluded_directories.dedup(); self.included_directories.dedup(); self.reference_directories.dedup(); // Optimize for duplicated included directories - "/", "/home". "/home/Pulpit" to "/" // Do not use when not using recursive search or using if recursive_search && !self.exclude_other_filesystems.unwrap_or(false) { // This is only point which can't be done when recursive search is disabled. let mut is_inside: bool; for ed_checked in &self.excluded_directories { is_inside = false; for ed_help in &self.excluded_directories { if ed_checked == ed_help { // We checking same element continue; } if ed_checked.starts_with(ed_help) { is_inside = true; break; } } if !is_inside { optimized_excluded.push(ed_checked.clone()); } } for id_checked in &self.included_directories { is_inside = false; for id_help in &self.included_directories { if id_checked == id_help { // We checking same element continue; } if id_checked.starts_with(id_help) { is_inside = true; break; } } if !is_inside { optimized_included.push(id_checked.clone()); } } self.included_directories = optimized_included; optimized_included = Vec::new(); self.excluded_directories = optimized_excluded; optimized_excluded = Vec::new(); } // Remove included directories which are inside any excluded directory for id in &self.included_directories { let mut is_inside: bool = false; for ed in &self.excluded_directories { if id.starts_with(ed) { is_inside = true; break; } } if !is_inside { optimized_included.push(id.clone()); } } self.included_directories = optimized_included; optimized_included = Vec::new(); // Remove non existed directories for id in &self.included_directories { let path = Path::new(id); if path.exists() { optimized_included.push(id.clone()); } } for ed in &self.excluded_directories { let path = Path::new(ed); if path.exists() { optimized_excluded.push(ed.clone()); } } self.included_directories = optimized_included; self.excluded_directories = optimized_excluded; optimized_excluded = Vec::new(); // Excluded paths must are inside included path, because for ed in &self.excluded_directories { let mut is_inside: bool = false; for id in &self.included_directories { if ed.starts_with(id) { is_inside = true; break; } } if is_inside { optimized_excluded.push(ed.clone()); } } self.excluded_directories = optimized_excluded; // Selecting Reference folders { let mut ref_folders = Vec::new(); for folder in &self.reference_directories { if self.included_directories.iter().any(|e| folder.starts_with(e)) { ref_folders.push(folder.clone()); } } self.reference_directories = ref_folders; } if self.included_directories.is_empty() { messages.errors.push(flc!("core_directory_overlap")); return messages; } // Not needed, but better is to have sorted everything self.excluded_directories.sort_unstable(); self.included_directories.sort_unstable(); // Get device IDs for included directories #[cfg(target_family = "unix")] if self.exclude_other_filesystems() { for d in &self.included_directories { match fs::metadata(d) { Ok(m) => self.included_dev_ids.push(m.dev()), Err(_) => messages.errors.push(flc!("core_directory_unable_to_get_device_id", path = d.to_string_lossy().to_string())), } } } messages } pub fn is_in_referenced_directory(&self, path: &Path) -> bool { self.reference_directories.iter().any(|e| path.starts_with(e)) } pub fn is_excluded(&self, path: &Path) -> bool { #[cfg(target_family = "windows")] let path = normalize_windows_path(path); // We're assuming that `excluded_directories` are already normalized self.excluded_directories.iter().any(|p| p.as_path() == path) } #[cfg(target_family = "unix")] pub fn exclude_other_filesystems(&self) -> bool { self.exclude_other_filesystems.unwrap_or(false) } #[cfg(target_family = "unix")] pub fn is_on_other_filesystems(&self, path: impl AsRef) -> Result { let path = path.as_ref(); match fs::metadata(path) { Ok(m) => Ok(!self.included_dev_ids.iter().any(|&id| id == m.dev())), Err(_) => Err(flc!("core_directory_unable_to_get_device_id", path = path.to_string_lossy().to_string())), } } } czkawka_core-8.0.0/src/common_extensions.rs000064400000000000000000000116651046102023000172110ustar 00000000000000use std::collections::HashSet; use std::fs::DirEntry; use crate::common_messages::Messages; #[derive(Debug, Clone, Default)] pub struct Extensions { allowed_extensions_hashset: HashSet, excluded_extensions_hashset: HashSet, } impl Extensions { pub fn new() -> Self { Default::default() } pub fn filter_extensions(mut file_extensions: String) -> (HashSet, Messages) { let mut messages = Messages::new(); let mut extensions_hashset = HashSet::new(); if file_extensions.trim().is_empty() { return (Default::default(), messages); } file_extensions = file_extensions.replace("IMAGE", "jpg,kra,gif,png,bmp,tiff,hdr,svg"); file_extensions = file_extensions.replace("VIDEO", "mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp"); file_extensions = file_extensions.replace("MUSIC", "mp3,flac,ogg,tta,wma,webm"); file_extensions = file_extensions.replace("TEXT", "txt,doc,docx,odt,rtf"); let extensions: Vec = file_extensions.split(',').map(str::trim).map(String::from).collect(); for mut extension in extensions { if extension.is_empty() || extension.replace(['.', ' '], "").trim().is_empty() { continue; } if extension.starts_with('.') { extension = extension.chars().skip(1).collect::(); } if extension.contains('.') { messages.warnings.push(format!("{extension} is not valid extension because contains dot inside")); continue; } if extension.contains(' ') { messages.warnings.push(format!("{extension} is not valid extension because contains empty space inside")); continue; } extensions_hashset.insert(extension); } (extensions_hashset, messages) } /// List of allowed extensions, only files with this extensions will be checking if are duplicates /// After, extensions cannot contain any dot, commas etc. pub fn set_allowed_extensions(&mut self, allowed_extensions: String) -> Messages { let (extensions, messages) = Self::filter_extensions(allowed_extensions); self.allowed_extensions_hashset = extensions; messages } pub fn set_excluded_extensions(&mut self, excluded_extensions: String) -> Messages { let (extensions, messages) = Self::filter_extensions(excluded_extensions); self.excluded_extensions_hashset = extensions; messages } pub fn check_if_entry_have_valid_extension(&self, entry_data: &DirEntry) -> bool { if self.allowed_extensions_hashset.is_empty() && self.excluded_extensions_hashset.is_empty() { return true; } // Using entry_data.path().extension() is a lot of slower, even 5 times let file_name = entry_data.file_name(); let Some(file_name_str) = file_name.to_str() else { return false }; let Some(extension_idx) = file_name_str.rfind('.') else { return false }; let extension = &file_name_str[extension_idx + 1..]; if !self.allowed_extensions_hashset.is_empty() { if extension.chars().all(|c| c.is_ascii_lowercase()) { self.allowed_extensions_hashset.contains(extension) } else { self.allowed_extensions_hashset.contains(&extension.to_lowercase()) } } else { if extension.chars().all(|c| c.is_ascii_lowercase()) { !self.excluded_extensions_hashset.contains(extension) } else { !self.excluded_extensions_hashset.contains(&extension.to_lowercase()) } } } pub fn set_any_extensions(&self) -> bool { !self.allowed_extensions_hashset.is_empty() } fn extend_allowed_extensions(&mut self, file_extensions: &[&str]) { for extension in file_extensions { let extension_without_dot = extension.trim_start_matches('.'); self.allowed_extensions_hashset.insert(extension_without_dot.to_string()); } } // E.g. when using similar videos, user can provide extensions like "mp4,flv", but if user provide "mp4,jpg" then // it will be only "mp4" because "jpg" is not valid extension for videos #[allow(clippy::unused_self)] fn union_allowed_extensions(&mut self, file_extensions: &[&str]) { let mut new_extensions = HashSet::new(); for extension in file_extensions { let extension_without_dot = extension.trim_start_matches('.'); new_extensions.insert(extension_without_dot.to_string()); } } pub fn set_and_validate_allowed_extensions(&mut self, file_extensions: &[&str]) { if self.allowed_extensions_hashset.is_empty() { self.extend_allowed_extensions(file_extensions); } else { self.union_allowed_extensions(file_extensions); } } } czkawka_core-8.0.0/src/common_image.rs000064400000000000000000000204041046102023000160630ustar 00000000000000#![allow(unused_imports)] // I don't wanna fight with unused(heif) imports in this file, so simply ignore it to avoid too much complexity use std::cmp::Ordering; use std::ffi::OsString; use std::fs::{DirEntry, File, OpenOptions}; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicBool, AtomicUsize}; use std::sync::{atomic, Arc}; use std::thread::{sleep, JoinHandle}; use std::time::{Duration, Instant, SystemTime}; use std::{fs, panic, thread}; use anyhow::anyhow; use crossbeam_channel::Sender; use directories_next::ProjectDirs; use fun_time::fun_time; use handsome_logger::{ColorChoice, ConfigBuilder, TerminalMode}; use image::{DynamicImage, ImageBuffer, Rgb, Rgba}; use imagepipe::{ImageSource, Pipeline}; use jxl_oxide::image::BitDepth; use jxl_oxide::{JxlImage, PixelFormat}; #[cfg(feature = "heif")] use libheif_rs::{ColorSpace, HeifContext, RgbChroma}; #[cfg(feature = "libraw")] use libraw::Processor; use log::{debug, error, info, warn, LevelFilter, Record}; use rawloader::RawLoader; use symphonia::core::conv::IntoSample; use crate::common; use crate::common::{create_crash_message, HEIC_EXTENSIONS, IMAGE_RS_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, JXL_IMAGE_EXTENSIONS, RAW_IMAGE_EXTENSIONS}; // #[cfg(feature = "heif")] // use libheif_rs::LibHeif; // TODO this code is ugly - this should exists in image-rs or be taken from official example of jxl-oxide // Its presence offends everything good in this world pub fn get_jxl_image(path: &str) -> anyhow::Result { let buf_reader = std::io::BufReader::new(File::open(path)?); let decoder = JxlImage::builder().read(buf_reader).map_err(|e| anyhow::anyhow!("Failed to read jxl file {e}"))?; let width = decoder.width(); let height = decoder.height(); let frame = decoder.render_frame(0).map_err(|e| anyhow::anyhow!("Failed to render jxl frame {e}"))?; let planar = &frame.image_planar(); let pixfmt = decoder.pixel_format(); let bits_per_sample = decoder.image_header().metadata.bit_depth; if bits_per_sample.bits_per_sample() == 8 && pixfmt == PixelFormat::Rgb && planar.len() == 3 { let zips = planar[0].buf().iter().zip(planar[1].buf().iter()).zip(planar[2].buf().iter()); let pixels = zips.flat_map(|((r, g), b)| [(r * 255.0) as u8, (g * 255.0) as u8, (b * 255.0) as u8]).collect::>(); let data = ImageBuffer::, Vec>::from_vec(width, height, pixels).ok_or_else(|| anyhow::anyhow!("Failed to create rgb image buffer from jxl data"))?; Ok(DynamicImage::ImageRgb8(data)) } else if bits_per_sample.bits_per_sample() == 8 && pixfmt == PixelFormat::Rgba && planar.len() == 4 { let zips = planar[0].buf().iter().zip(planar[1].buf().iter()).zip(planar[2].buf().iter()).zip(planar[3].buf().iter()); let pixels = zips .flat_map(|(((r, g), b), a)| [(r * 255.0) as u8, (g * 255.0) as u8, (b * 255.0) as u8, (a * 255.0) as u8]) .collect::>(); let data = ImageBuffer::, Vec>::from_vec(width, height, pixels).ok_or_else(|| anyhow::anyhow!("Failed to create rgba image buffer from jxl data"))?; Ok(DynamicImage::ImageRgba8(data)) } else { return Err(anyhow::anyhow!("Unsupported number of planes: {}", planar.len())); } } pub fn get_dynamic_image_from_path(path: &str) -> Result { let path_lower = Path::new(path).extension().unwrap_or_default().to_string_lossy().to_lowercase(); let res = panic::catch_unwind(|| { if HEIC_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext)) { #[cfg(feature = "heif")] { get_dynamic_image_from_heic(path).map_err(|e| format!("Cannot open heic file \"{path}\": {e}")) } #[cfg(not(feature = "heif"))] { image::open(path).map_err(|e| format!("Cannot open image file \"{path}\": {e}")) } } else if JXL_IMAGE_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext)) { get_jxl_image(path).map_err(|e| format!("Cannot open jxl image file \"{path}\": {e}")) } else if RAW_IMAGE_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext)) { get_raw_image(path).map_err(|e| format!("Cannot open raw image file \"{path}\": {e}")) } else { image::open(path).map_err(|e| format!("Cannot open image file \"{path}\": {e}")) } }); if let Ok(res) = res { match res { Ok(t) => Ok(t), Err(e) => Err(format!("Cannot open image file \"{path}\": {e}")), } } else { let message = create_crash_message("Image-rs or libraw-rs or jxl-oxide", path, "https://github.com/image-rs/image/issues"); println!("{message}"); Err(message) } } #[cfg(feature = "heif")] pub fn get_dynamic_image_from_heic(path: &str) -> anyhow::Result { // let libheif = LibHeif::new(); let im = HeifContext::read_from_file(path)?; let handle = im.primary_image_handle()?; // let image = libheif.decode(&handle, ColorSpace::Rgb(RgbChroma::Rgb), None)?; // Enable when using libheif 0.19 let image = handle.decode(ColorSpace::Rgb(RgbChroma::Rgb), None)?; let width = image.width(); let height = image.height(); let planes = image.planes(); let interleaved_plane = planes.interleaved.ok_or_else(|| anyhow::anyhow!("Failed to get interleaved plane"))?; ImageBuffer::from_raw(width, height, interleaved_plane.data.to_owned()) .map(DynamicImage::ImageRgb8) .ok_or_else(|| anyhow::anyhow!("Failed to create image buffer")) } #[cfg(feature = "libraw")] pub fn get_raw_image(path: impl AsRef) -> anyhow::Result { let buf = fs::read(path.as_ref())?; let processor = Processor::new(); let processed = processor.process_8bit(&buf)?; let width = processed.width(); let height = processed.height(); let data = processed.to_vec(); let data_len = data.len(); let buffer = ImageBuffer::from_raw(width, height, data).ok_or(anyhow::anyhow!(format!( "Cannot create ImageBuffer from raw image with width: {width} and height: {height} and data length: {data_len}", )))?; Ok(DynamicImage::ImageRgb8(buffer)) } #[cfg(not(feature = "libraw"))] pub fn get_raw_image(path: impl AsRef + std::fmt::Debug) -> Result { let mut start_timer = Instant::now(); let mut times = Vec::new(); let loader = RawLoader::new(); let raw = loader.decode_file(path.as_ref()).map_err(|e| format!("Error decoding file: {e:?}"))?; times.push(("After decoding", start_timer.elapsed())); start_timer = Instant::now(); let source = ImageSource::Raw(raw); times.push(("After creating source", start_timer.elapsed())); start_timer = Instant::now(); let mut pipeline = Pipeline::new_from_source(source).map_err(|e| format!("Error creating pipeline: {e:?}"))?; times.push(("After creating pipeline", start_timer.elapsed())); start_timer = Instant::now(); pipeline.run(None); let image = pipeline.output_8bit(None).map_err(|e| format!("Error running pipeline: {e:?}"))?; times.push(("After creating image", start_timer.elapsed())); start_timer = Instant::now(); let image = ImageBuffer::, Vec>::from_raw(image.width as u32, image.height as u32, image.data).ok_or_else(|| "Failed to create image buffer".to_string())?; times.push(("After creating image buffer", start_timer.elapsed())); start_timer = Instant::now(); let res = DynamicImage::ImageRgb8(image); times.push(("After creating dynamic image", start_timer.elapsed())); let str_timer = times.into_iter().map(|(name, time)| format!("{name}: {time:?}")).collect::>().join(", "); debug!("Loading raw image --- {str_timer}"); Ok(res) } pub fn check_if_can_display_image(path: &str) -> bool { let Some(extension) = Path::new(path).extension() else { return false; }; let extension_str = extension.to_string_lossy().to_lowercase(); #[cfg(feature = "heif")] let allowed_extensions = &[IMAGE_RS_EXTENSIONS, RAW_IMAGE_EXTENSIONS, JXL_IMAGE_EXTENSIONS, HEIC_EXTENSIONS].concat(); #[cfg(not(feature = "heif"))] let allowed_extensions = &[IMAGE_RS_EXTENSIONS, RAW_IMAGE_EXTENSIONS, JXL_IMAGE_EXTENSIONS].concat(); allowed_extensions.iter().any(|ext| extension_str.ends_with(ext)) } czkawka_core-8.0.0/src/common_items.rs000064400000000000000000000076351046102023000161350ustar 00000000000000use std::path::Path; #[cfg(not(target_family = "unix"))] use crate::common::normalize_windows_path; use crate::common::regex_check; use crate::common_messages::Messages; #[cfg(target_family = "unix")] pub const DEFAULT_EXCLUDED_DIRECTORIES: &[&str] = &["/proc", "/dev", "/sys", "/run", "/snap"]; #[cfg(not(target_family = "unix"))] pub const DEFAULT_EXCLUDED_DIRECTORIES: &[&str] = &["C:\\Windows"]; #[cfg(target_family = "unix")] pub const DEFAULT_EXCLUDED_ITEMS: &str = "*/.git/*,*/node_modules/*,*/lost+found/*,*/Trash/*,*/.Trash-*/*,*/snap/*,/home/*/.cache/*"; #[cfg(not(target_family = "unix"))] pub const DEFAULT_EXCLUDED_ITEMS: &str = "*\\.git\\*,*\\node_modules\\*,*\\lost+found\\*,*:\\windows\\*,*:\\$RECYCLE.BIN\\*,*:\\$SysReset\\*,*:\\System Volume Information\\*,*:\\OneDriveTemp\\*,*:\\hiberfil.sys,*:\\pagefile.sys,*:\\swapfile.sys"; #[derive(Debug, Clone, Default)] pub struct ExcludedItems { expressions: Vec, connected_expressions: Vec, } #[derive(Debug, Clone, Default)] pub struct SingleExcludedItem { pub expression: String, pub expression_splits: Vec, pub unique_extensions_splits: Vec, } impl ExcludedItems { pub fn new() -> Self { Default::default() } pub fn new_from(excluded_items: Vec) -> Self { let mut s = Self::new(); s.set_excluded_items(excluded_items); s } pub fn set_excluded_items(&mut self, excluded_items: Vec) -> Messages { let mut warnings: Vec = Vec::new(); if excluded_items.is_empty() { return Messages::new(); } let expressions: Vec = excluded_items; let mut checked_expressions: Vec = Vec::new(); for expression in expressions { let expression: String = expression.trim().to_string(); if expression.is_empty() { continue; } #[cfg(target_family = "windows")] let expression = expression.replace("/", "\\"); if expression == "DEFAULT" { checked_expressions.push(DEFAULT_EXCLUDED_ITEMS.to_string()); continue; } if !expression.contains('*') { warnings.push("Excluded Items Warning: Wildcard * is required in expression, ignoring ".to_string() + expression.as_str()); continue; } checked_expressions.push(expression); } for checked_expression in &checked_expressions { let item = new_excluded_item(checked_expression); self.expressions.push(item.expression.clone()); self.connected_expressions.push(item); } Messages { messages: vec![], warnings, errors: vec![], } } pub fn get_excluded_items(&self) -> &Vec { &self.expressions } pub fn is_excluded(&self, path: &Path) -> bool { if self.connected_expressions.is_empty() { return false; } #[cfg(target_family = "windows")] let path = normalize_windows_path(path); let path_str = path.to_string_lossy(); for expression in &self.connected_expressions { if regex_check(expression, &path_str) { return true; } } false } } pub fn new_excluded_item(expression: &str) -> SingleExcludedItem { let expression = expression.trim().to_string(); let expression_splits: Vec = expression.split('*').filter_map(|e| if e.is_empty() { None } else { Some(e.to_string()) }).collect(); let mut unique_extensions_splits = expression_splits.clone(); unique_extensions_splits.sort(); unique_extensions_splits.dedup(); unique_extensions_splits.sort_by_key(|b| std::cmp::Reverse(b.len())); SingleExcludedItem { expression, expression_splits, unique_extensions_splits, } } czkawka_core-8.0.0/src/common_messages.rs000064400000000000000000000047311046102023000166150ustar 00000000000000#[derive(Debug, Default, Clone)] pub struct Messages { pub messages: Vec, pub warnings: Vec, pub errors: Vec, } impl Messages { pub fn new() -> Self { Default::default() } pub fn new_from_errors(errors: Vec) -> Self { Messages { errors, ..Default::default() } } pub fn new_from_warnings(warnings: Vec) -> Self { Messages { warnings, ..Default::default() } } pub fn new_from_messages(messages: Vec) -> Self { Messages { messages, ..Default::default() } } pub fn print_messages(&self) { println!("{}", self.create_messages_text()); } pub fn create_messages_text(&self) -> String { let mut text_to_return: String = String::new(); if !self.messages.is_empty() { text_to_return += "-------------------------------MESSAGES--------------------------------\n"; for i in &self.messages { text_to_return += i; text_to_return += "\n"; } text_to_return += "---------------------------END OF MESSAGES-----------------------------\n"; } if !self.warnings.is_empty() { text_to_return += "-------------------------------WARNINGS--------------------------------\n"; for i in &self.warnings { text_to_return += i; text_to_return += "\n"; } text_to_return += "---------------------------END OF WARNINGS-----------------------------\n"; } if !self.errors.is_empty() { text_to_return += "--------------------------------ERRORS---------------------------------\n"; for i in &self.errors { text_to_return += i; text_to_return += "\n"; } text_to_return += "----------------------------END OF ERRORS------------------------------\n"; } text_to_return } pub fn extend_messages_with(&mut self, messages: Vec, warnings: Vec, errors: Vec) { self.messages.extend(messages); self.warnings.extend(warnings); self.errors.extend(errors); } pub fn extend_with_another_messages(&mut self, messages: Messages) { let (messages, warnings, errors) = (messages.messages, messages.warnings, messages.errors); self.messages.extend(messages); self.warnings.extend(warnings); self.errors.extend(errors); } } czkawka_core-8.0.0/src/common_tool.rs000064400000000000000000000204411046102023000157570ustar 00000000000000use std::path::PathBuf; use crate::common_dir_traversal::{CheckingMethod, ToolType}; use crate::common_directory::Directories; use crate::common_extensions::Extensions; use crate::common_items::ExcludedItems; use crate::common_messages::Messages; #[derive(Debug, Clone, Default)] pub struct CommonToolData { pub(crate) tool_type: ToolType, pub(crate) text_messages: Messages, pub(crate) directories: Directories, pub(crate) extensions: Extensions, pub(crate) excluded_items: ExcludedItems, pub(crate) recursive_search: bool, pub(crate) delete_method: DeleteMethod, pub(crate) maximal_file_size: u64, pub(crate) minimal_file_size: u64, pub(crate) stopped_search: bool, pub(crate) use_cache: bool, pub(crate) delete_outdated_cache: bool, pub(crate) save_also_as_json: bool, pub(crate) use_reference_folders: bool, pub(crate) dry_run: bool, } #[derive(Eq, PartialEq, Clone, Debug, Copy, Default)] pub enum DeleteMethod { #[default] None, Delete, // Just delete items AllExceptNewest, AllExceptOldest, OneOldest, OneNewest, HardLink, AllExceptBiggest, AllExceptSmallest, OneBiggest, OneSmallest, } impl CommonToolData { pub fn new(tool_type: ToolType) -> Self { Self { tool_type, text_messages: Messages::new(), directories: Directories::new(), extensions: Extensions::new(), excluded_items: ExcludedItems::new(), recursive_search: true, delete_method: DeleteMethod::None, maximal_file_size: u64::MAX, minimal_file_size: 8192, stopped_search: false, use_cache: true, delete_outdated_cache: true, save_also_as_json: false, use_reference_folders: false, dry_run: false, } } } pub trait CommonData { fn get_cd(&self) -> &CommonToolData; fn get_cd_mut(&mut self) -> &mut CommonToolData; fn get_check_method(&self) -> CheckingMethod { CheckingMethod::None } fn get_test_type(&self) -> (ToolType, CheckingMethod) { (self.get_cd().tool_type, self.get_check_method()) } fn get_tool_type(&self) -> ToolType { self.get_cd().tool_type } fn set_dry_run(&mut self, dry_run: bool) { self.get_cd_mut().dry_run = dry_run; } fn get_dry_run(&self) -> bool { self.get_cd().dry_run } fn set_use_cache(&mut self, use_cache: bool) { self.get_cd_mut().use_cache = use_cache; } fn get_use_cache(&self) -> bool { self.get_cd().use_cache } fn set_delete_outdated_cache(&mut self, delete_outdated_cache: bool) { self.get_cd_mut().delete_outdated_cache = delete_outdated_cache; } fn get_delete_outdated_cache(&self) -> bool { self.get_cd().delete_outdated_cache } fn get_stopped_search(&self) -> bool { self.get_cd().stopped_search } fn set_stopped_search(&mut self, stopped_search: bool) { self.get_cd_mut().stopped_search = stopped_search; } fn set_maximal_file_size(&mut self, maximal_file_size: u64) { self.get_cd_mut().maximal_file_size = match maximal_file_size { 0 => 1, t => t, }; } fn get_maximal_file_size(&self) -> u64 { self.get_cd().maximal_file_size } fn set_minimal_file_size(&mut self, minimal_file_size: u64) { self.get_cd_mut().minimal_file_size = match minimal_file_size { 0 => 1, t => t, }; } fn get_minimal_file_size(&self) -> u64 { self.get_cd().minimal_file_size } fn set_reference_directory(&mut self, reference_directory: Vec) { let messages = self.get_cd_mut().directories.set_reference_directory(&reference_directory); self.get_cd_mut().text_messages.extend_with_another_messages(messages); } #[cfg(target_family = "unix")] fn set_exclude_other_filesystems(&mut self, exclude_other_filesystems: bool) { self.get_cd_mut().directories.set_exclude_other_filesystems(exclude_other_filesystems); } #[cfg(not(target_family = "unix"))] fn set_exclude_other_filesystems(&mut self, _exclude_other_filesystems: bool) {} fn get_text_messages(&self) -> &Messages { &self.get_cd().text_messages } fn get_text_messages_mut(&mut self) -> &mut Messages { &mut self.get_cd_mut().text_messages } fn set_save_also_as_json(&mut self, save_also_as_json: bool) { self.get_cd_mut().save_also_as_json = save_also_as_json; } fn get_save_also_as_json(&self) -> bool { self.get_cd().save_also_as_json } fn set_recursive_search(&mut self, recursive_search: bool) { self.get_cd_mut().recursive_search = recursive_search; } fn get_recursive_search(&self) -> bool { self.get_cd().recursive_search } fn set_use_reference_folders(&mut self, use_reference_folders: bool) { self.get_cd_mut().use_reference_folders = use_reference_folders; } fn get_use_reference_folders(&self) -> bool { self.get_cd().use_reference_folders } fn set_delete_method(&mut self, delete_method: DeleteMethod) { self.get_cd_mut().delete_method = delete_method; } fn get_delete_method(&self) -> DeleteMethod { self.get_cd().delete_method } fn set_included_directory(&mut self, included_directory: Vec) { let messages = self.get_cd_mut().directories.set_included_directory(included_directory); self.get_cd_mut().text_messages.extend_with_another_messages(messages); } fn set_excluded_directory(&mut self, excluded_directory: Vec) { let messages = self.get_cd_mut().directories.set_excluded_directory(excluded_directory); self.get_cd_mut().text_messages.extend_with_another_messages(messages); } fn set_allowed_extensions(&mut self, allowed_extensions: String) { let messages = self.get_cd_mut().extensions.set_allowed_extensions(allowed_extensions); self.get_cd_mut().text_messages.extend_with_another_messages(messages); } fn set_excluded_extensions(&mut self, excluded_extensions: String) { let messages = self.get_cd_mut().extensions.set_excluded_extensions(excluded_extensions); self.get_cd_mut().text_messages.extend_with_another_messages(messages); } fn set_excluded_items(&mut self, excluded_items: Vec) { let messages = self.get_cd_mut().excluded_items.set_excluded_items(excluded_items); self.get_cd_mut().text_messages.extend_with_another_messages(messages); } fn prepare_items(&mut self) { let recursive_search = self.get_cd().recursive_search; // Optimizes directories and removes recursive calls let messages = self.get_cd_mut().directories.optimize_directories(recursive_search); self.get_cd_mut().text_messages.extend_with_another_messages(messages); } fn debug_print_common(&self) { println!("---------------DEBUG PRINT COMMON---------------"); println!("Tool type: {:?}", self.get_cd().tool_type); println!("Directories: {:?}", self.get_cd().directories); println!("Extensions: {:?}", self.get_cd().extensions); println!("Excluded items: {:?}", self.get_cd().excluded_items); println!("Recursive search: {}", self.get_cd().recursive_search); println!("Maximal file size: {}", self.get_cd().maximal_file_size); println!("Minimal file size: {}", self.get_cd().minimal_file_size); println!("Stopped search: {}", self.get_cd().stopped_search); println!("Use cache: {}", self.get_cd().use_cache); println!("Delete outdated cache: {}", self.get_cd().delete_outdated_cache); println!("Save also as json: {}", self.get_cd().save_also_as_json); println!("Delete method: {:?}", self.get_cd().delete_method); println!("Use reference folders: {}", self.get_cd().use_reference_folders); println!("Dry run: {}", self.get_cd().dry_run); println!("---------------DEBUG PRINT MESSAGES---------------"); println!("Errors size - {}", self.get_cd().text_messages.errors.len()); println!("Warnings size - {}", self.get_cd().text_messages.warnings.len()); println!("Messages size - {}", self.get_cd().text_messages.messages.len()); } } czkawka_core-8.0.0/src/common_traits.rs000064400000000000000000000061351046102023000163140ustar 00000000000000use std::fs::File; use std::io::{BufWriter, Write}; use std::path::Path; use fun_time::fun_time; use serde::Serialize; pub trait DebugPrint { fn debug_print(&self); } pub trait PrintResults { fn write_results(&self, writer: &mut T) -> std::io::Result<()>; #[fun_time(message = "print_results_to_output", level = "debug")] fn print_results_to_output(&self) { let stdout = std::io::stdout(); let mut handle = stdout.lock(); // Panics here are allowed, because it is used only in CLI self.write_results(&mut handle).expect("Error while writing to stdout"); handle.flush().expect("Error while flushing stdout"); } #[fun_time(message = "print_results_to_file", level = "debug")] fn print_results_to_file(&self, file_name: &str) -> std::io::Result<()> { let file_name: String = match file_name { "" => "results.txt".to_string(), k => k.to_string(), }; let file_handler = File::create(file_name)?; let mut writer = BufWriter::new(file_handler); self.write_results(&mut writer)?; writer.flush()?; Ok(()) } fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()>; fn save_results_to_file_as_json_internal(&self, file_name: &str, item_to_serialize: &T, pretty_print: bool) -> std::io::Result<()> { if pretty_print { self.save_results_to_file_as_json_pretty(file_name, item_to_serialize) } else { self.save_results_to_file_as_json_compact(file_name, item_to_serialize) } } #[fun_time(message = "save_results_to_file_as_json_pretty", level = "debug")] fn save_results_to_file_as_json_pretty(&self, file_name: &str, item_to_serialize: &T) -> std::io::Result<()> { let file_handler = File::create(file_name)?; let mut writer = BufWriter::new(file_handler); serde_json::to_writer_pretty(&mut writer, item_to_serialize)?; Ok(()) } #[fun_time(message = "save_results_to_file_as_json_compact", level = "debug")] fn save_results_to_file_as_json_compact(&self, file_name: &str, item_to_serialize: &T) -> std::io::Result<()> { let file_handler = File::create(file_name)?; let mut writer = BufWriter::new(file_handler); serde_json::to_writer(&mut writer, item_to_serialize)?; Ok(()) } fn save_all_in_one(&self, folder: &str, base_file_name: &str) -> std::io::Result<()> { let pretty_name = format!("{folder}/{base_file_name}_pretty.json"); self.save_results_to_file_as_json(&pretty_name, true)?; let compact_name = format!("{folder}/{base_file_name}_compact.json"); self.save_results_to_file_as_json(&compact_name, false)?; let txt_name = format!("{folder}/{base_file_name}.txt"); self.print_results_to_file(&txt_name)?; Ok(()) } } pub trait ResultEntry { fn get_path(&self) -> &Path; fn get_modified_date(&self) -> u64; fn get_size(&self) -> u64; } czkawka_core-8.0.0/src/duplicate.rs000064400000000000000000002012541046102023000154070ustar 00000000000000use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::Debug; use std::fs::File; use std::hash::Hasher; use std::io::prelude::*; use std::io::{self, Error, ErrorKind}; #[cfg(target_family = "unix")] use std::os::unix::fs::MetadataExt; use std::path::{Path, PathBuf}; use std::sync::atomic::Ordering; use std::{fs, mem}; use crossbeam_channel::{Receiver, Sender}; use fun_time::fun_time; use humansize::{format_size, BINARY}; use log::debug; use rayon::prelude::*; use serde::{Deserialize, Serialize}; use xxhash_rust::xxh3::Xxh3; use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads}; use crate::common_cache::{get_duplicate_cache_file, load_cache_from_file_generalized_by_size, save_cache_to_file_generalized}; use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::*; use crate::progress_data::{CurrentStage, ProgressData}; const TEMP_HARDLINK_FILE: &str = "rzeczek.rxrxrxl"; #[derive(PartialEq, Eq, Clone, Debug, Copy, Default)] pub enum HashType { #[default] Blake3, Crc32, Xxh3, } impl HashType { fn hasher(self: HashType) -> Box { match self { HashType::Blake3 => Box::new(blake3::Hasher::new()), HashType::Crc32 => Box::new(crc32fast::Hasher::new()), HashType::Xxh3 => Box::new(Xxh3::new()), } } } #[derive(Clone, Serialize, Deserialize, Debug, Default)] pub struct DuplicateEntry { pub path: PathBuf, pub modified_date: u64, pub size: u64, pub hash: String, } impl ResultEntry for DuplicateEntry { fn get_path(&self) -> &Path { &self.path } fn get_modified_date(&self) -> u64 { self.modified_date } fn get_size(&self) -> u64 { self.size } } impl FileEntry { fn into_duplicate_entry(self) -> DuplicateEntry { DuplicateEntry { size: self.size, path: self.path, modified_date: self.modified_date, hash: String::new(), } } } #[derive(Default)] pub struct Info { pub number_of_groups_by_size: usize, pub number_of_duplicated_files_by_size: usize, pub number_of_groups_by_hash: usize, pub number_of_duplicated_files_by_hash: usize, pub number_of_groups_by_name: usize, pub number_of_duplicated_files_by_name: usize, pub number_of_groups_by_size_name: usize, pub number_of_duplicated_files_by_size_name: usize, pub lost_space_by_size: u64, pub lost_space_by_hash: u64, } pub struct DuplicateFinderParameters { pub check_method: CheckingMethod, pub hash_type: HashType, pub ignore_hard_links: bool, pub use_prehash_cache: bool, pub minimal_cache_file_size: u64, pub minimal_prehash_cache_file_size: u64, pub case_sensitive_name_comparison: bool, } impl DuplicateFinderParameters { pub fn new( check_method: CheckingMethod, hash_type: HashType, ignore_hard_links: bool, use_prehash_cache: bool, minimal_cache_file_size: u64, minimal_prehash_cache_file_size: u64, case_sensitive_name_comparison: bool, ) -> Self { Self { check_method, hash_type, ignore_hard_links, use_prehash_cache, minimal_cache_file_size, minimal_prehash_cache_file_size, case_sensitive_name_comparison, } } } pub struct DuplicateFinder { common_data: CommonToolData, information: Info, // File Size, File Entry files_with_identical_names: BTreeMap>, // File (Size, Name), File Entry files_with_identical_size_names: BTreeMap<(u64, String), Vec>, // File Size, File Entry files_with_identical_size: BTreeMap>, // File Size, next grouped by file size, next grouped by hash files_with_identical_hashes: BTreeMap>>, // File Size, File Entry files_with_identical_names_referenced: BTreeMap)>, // File (Size, Name), File Entry files_with_identical_size_names_referenced: BTreeMap<(u64, String), (DuplicateEntry, Vec)>, // File Size, File Entry files_with_identical_size_referenced: BTreeMap)>, // File Size, next grouped by file size, next grouped by hash files_with_identical_hashes_referenced: BTreeMap)>>, params: DuplicateFinderParameters, } impl DuplicateFinder { pub fn new(params: DuplicateFinderParameters) -> Self { Self { common_data: CommonToolData::new(ToolType::Duplicate), information: Info::default(), files_with_identical_names: Default::default(), files_with_identical_size: Default::default(), files_with_identical_size_names: Default::default(), files_with_identical_hashes: Default::default(), files_with_identical_names_referenced: Default::default(), files_with_identical_size_names_referenced: Default::default(), files_with_identical_size_referenced: Default::default(), files_with_identical_hashes_referenced: Default::default(), params, } } #[fun_time(message = "find_duplicates", level = "info")] pub fn find_duplicates(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) { self.prepare_items(); self.common_data.use_reference_folders = !self.common_data.directories.reference_directories.is_empty(); match self.get_params().check_method { CheckingMethod::Name => { self.common_data.stopped_search = !self.check_files_name(stop_receiver, progress_sender); // TODO restore this to name if self.common_data.stopped_search { return; } } CheckingMethod::SizeName => { self.common_data.stopped_search = !self.check_files_size_name(stop_receiver, progress_sender); if self.common_data.stopped_search { return; } } CheckingMethod::Size => { self.common_data.stopped_search = !self.check_files_size(stop_receiver, progress_sender); if self.common_data.stopped_search { return; } } CheckingMethod::Hash => { self.common_data.stopped_search = !self.check_files_size(stop_receiver, progress_sender); if self.common_data.stopped_search { return; } self.common_data.stopped_search = !self.check_files_hash(stop_receiver, progress_sender); if self.common_data.stopped_search { return; } } _ => panic!(), } self.delete_files(); self.debug_print(); } #[fun_time(message = "check_files_name", level = "debug")] fn check_files_name(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { let group_by_func = if self.get_params().case_sensitive_name_comparison { |fe: &FileEntry| { fe.path .file_name() .unwrap_or_else(|| panic!("Found invalid file_name \"{}\"", fe.path.to_string_lossy())) .to_string_lossy() .to_string() } } else { |fe: &FileEntry| { fe.path .file_name() .unwrap_or_else(|| panic!("Found invalid file_name \"{}\"", fe.path.to_string_lossy())) .to_string_lossy() .to_lowercase() } }; let result = DirTraversalBuilder::new() .common_data(&self.common_data) .group_by(group_by_func) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .checking_method(CheckingMethod::Name) .build() .run(); match result { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { self.common_data.text_messages.warnings.extend(warnings); // Create new BTreeMap without single size entries(files have not duplicates) self.files_with_identical_names = grouped_file_entries .into_iter() .filter_map(|(name, vector)| { if vector.len() > 1 { Some((name, vector.into_iter().map(FileEntry::into_duplicate_entry).collect())) } else { None } }) .collect(); // Reference - only use in size, because later hash will be counted differently if self.common_data.use_reference_folders { let vec = mem::take(&mut self.files_with_identical_names) .into_iter() .filter_map(|(_name, vec_file_entry)| { let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) = vec_file_entry .into_iter() .partition(|e| self.common_data.directories.is_in_referenced_directory(e.get_path())); if normal_files.is_empty() { None } else { files_from_referenced_folders.pop().map(|file| (file, normal_files)) } }) .collect::)>>(); for (fe, vec_fe) in vec { self.files_with_identical_names_referenced.insert(fe.path.to_string_lossy().to_string(), (fe, vec_fe)); } } self.calculate_name_stats(); true } DirTraversalResult::Stopped => false, } } fn calculate_name_stats(&mut self) { if self.common_data.use_reference_folders { for (_fe, vector) in self.files_with_identical_names_referenced.values() { self.information.number_of_duplicated_files_by_name += vector.len(); self.information.number_of_groups_by_name += 1; } } else { for vector in self.files_with_identical_names.values() { self.information.number_of_duplicated_files_by_name += vector.len() - 1; self.information.number_of_groups_by_name += 1; } } } #[fun_time(message = "check_files_size_name", level = "debug")] fn check_files_size_name(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { let group_by_func = if self.get_params().case_sensitive_name_comparison { |fe: &FileEntry| { ( fe.size, fe.path .file_name() .unwrap_or_else(|| panic!("Found invalid file_name \"{}\"", fe.path.to_string_lossy())) .to_string_lossy() .to_string(), ) } } else { |fe: &FileEntry| { ( fe.size, fe.path .file_name() .unwrap_or_else(|| panic!("Found invalid file_name \"{}\"", fe.path.to_string_lossy())) .to_string_lossy() .to_lowercase(), ) } }; let result = DirTraversalBuilder::new() .common_data(&self.common_data) .group_by(group_by_func) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .checking_method(CheckingMethod::SizeName) .build() .run(); match result { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { self.common_data.text_messages.warnings.extend(warnings); self.files_with_identical_size_names = grouped_file_entries .into_iter() .filter_map(|(size_name, vector)| { if vector.len() > 1 { Some((size_name, vector.into_iter().map(FileEntry::into_duplicate_entry).collect())) } else { None } }) .collect(); // Reference - only use in size, because later hash will be counted differently if self.common_data.use_reference_folders { let vec = mem::take(&mut self.files_with_identical_size_names) .into_iter() .filter_map(|(_size, vec_file_entry)| { let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) = vec_file_entry .into_iter() .partition(|e| self.common_data.directories.is_in_referenced_directory(e.get_path())); if normal_files.is_empty() { None } else { files_from_referenced_folders.pop().map(|file| (file, normal_files)) } }) .collect::)>>(); for (fe, vec_fe) in vec { self.files_with_identical_size_names_referenced .insert((fe.size, fe.path.to_string_lossy().to_string()), (fe, vec_fe)); } } self.calculate_size_name_stats(); true } DirTraversalResult::Stopped => false, } } fn calculate_size_name_stats(&mut self) { if self.common_data.use_reference_folders { for ((size, _name), (_fe, vector)) in &self.files_with_identical_size_names_referenced { self.information.number_of_duplicated_files_by_size_name += vector.len(); self.information.number_of_groups_by_size_name += 1; self.information.lost_space_by_size += (vector.len() as u64) * size; } } else { for ((size, _name), vector) in &self.files_with_identical_size_names { self.information.number_of_duplicated_files_by_size_name += vector.len() - 1; self.information.number_of_groups_by_size_name += 1; self.information.lost_space_by_size += (vector.len() as u64 - 1) * size; } } } #[fun_time(message = "check_files_size", level = "debug")] fn check_files_size(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { let result = DirTraversalBuilder::new() .common_data(&self.common_data) .group_by(|fe| fe.size) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .checking_method(self.get_params().check_method) .build() .run(); match result { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { self.common_data.text_messages.warnings.extend(warnings); for (size, vec) in grouped_file_entries { if vec.len() <= 1 { continue; } let vector = if self.get_params().ignore_hard_links { filter_hard_links(&vec) } else { vec }; if vector.len() > 1 { self.files_with_identical_size .insert(size, vector.into_iter().map(FileEntry::into_duplicate_entry).collect()); } } self.filter_reference_folders_by_size(); self.calculate_size_stats(); debug!( "check_file_size - after calculating size stats/duplicates, found in {} groups, {} files with same size | referenced {} groups, {} files", self.files_with_identical_size.len(), self.files_with_identical_size.values().map(Vec::len).sum::(), self.files_with_identical_size_referenced.len(), self.files_with_identical_size_referenced.values().map(|(_fe, vec)| vec.len()).sum::() ); true } DirTraversalResult::Stopped => false, } } fn calculate_size_stats(&mut self) { if self.common_data.use_reference_folders { for (size, (_fe, vector)) in &self.files_with_identical_size_referenced { self.information.number_of_duplicated_files_by_size += vector.len(); self.information.number_of_groups_by_size += 1; self.information.lost_space_by_size += (vector.len() as u64) * size; } } else { for (size, vector) in &self.files_with_identical_size { self.information.number_of_duplicated_files_by_size += vector.len() - 1; self.information.number_of_groups_by_size += 1; self.information.lost_space_by_size += (vector.len() as u64 - 1) * size; } } } #[fun_time(message = "filter_reference_folders_by_size", level = "debug")] fn filter_reference_folders_by_size(&mut self) { if self.common_data.use_reference_folders && self.get_params().check_method == CheckingMethod::Size { let vec = mem::take(&mut self.files_with_identical_size) .into_iter() .filter_map(|(_size, vec_file_entry)| { let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) = vec_file_entry .into_iter() .partition(|e| self.common_data.directories.is_in_referenced_directory(e.get_path())); if normal_files.is_empty() { None } else { files_from_referenced_folders.pop().map(|file| (file, normal_files)) } }) .collect::)>>(); for (fe, vec_fe) in vec { self.files_with_identical_size_referenced.insert(fe.size, (fe, vec_fe)); } } } #[fun_time(message = "prehash_load_cache_at_start", level = "debug")] fn prehash_load_cache_at_start(&mut self) -> (BTreeMap>, BTreeMap>, BTreeMap>) { // Cache algorithm // - Load data from cache // - Convert from BT> to BT // - Save to proper values let loaded_hash_map; let mut records_already_cached: BTreeMap> = Default::default(); let mut non_cached_files_to_check: BTreeMap> = Default::default(); if self.get_params().use_prehash_cache { let (messages, loaded_items) = load_cache_from_file_generalized_by_size::( &get_duplicate_cache_file(&self.get_params().hash_type, true), self.get_delete_outdated_cache(), &self.files_with_identical_size, ); self.get_text_messages_mut().extend_with_another_messages(messages); loaded_hash_map = loaded_items.unwrap_or_default(); Self::diff_loaded_and_prechecked_files( "prehash_load_cache_at_start", mem::take(&mut self.files_with_identical_size), &loaded_hash_map, &mut records_already_cached, &mut non_cached_files_to_check, ); } else { loaded_hash_map = Default::default(); mem::swap(&mut self.files_with_identical_size, &mut non_cached_files_to_check); } (loaded_hash_map, records_already_cached, non_cached_files_to_check) } #[fun_time(message = "prehash_save_cache_at_exit", level = "debug")] fn prehash_save_cache_at_exit( &mut self, loaded_hash_map: BTreeMap>, pre_hash_results: &Vec<(u64, BTreeMap>, Vec)>, ) { if self.get_params().use_prehash_cache { // All results = records already cached + computed results let mut save_cache_to_hashmap: BTreeMap = Default::default(); for (size, vec_file_entry) in loaded_hash_map { if size >= self.get_params().minimal_prehash_cache_file_size { for file_entry in vec_file_entry { save_cache_to_hashmap.insert(file_entry.path.to_string_lossy().to_string(), file_entry.clone()); } } } for (size, hash_map, _errors) in pre_hash_results { if *size >= self.get_params().minimal_prehash_cache_file_size { for vec_file_entry in hash_map.values() { for file_entry in vec_file_entry { save_cache_to_hashmap.insert(file_entry.path.to_string_lossy().to_string(), file_entry.clone()); } } } } let messages = save_cache_to_file_generalized( &get_duplicate_cache_file(&self.get_params().hash_type, true), &save_cache_to_hashmap, self.common_data.save_also_as_json, self.get_params().minimal_prehash_cache_file_size, ); self.get_text_messages_mut().extend_with_another_messages(messages); } } #[fun_time(message = "prehashing", level = "debug")] fn prehashing( &mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>, pre_checked_map: &mut BTreeMap>, ) -> bool { if self.files_with_identical_size.is_empty() { return true; } let check_type = self.get_params().hash_type; let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::DuplicatePreHashCacheLoading, 0, self.get_test_type()); let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.prehash_load_cache_at_start(); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); if check_if_stop_received(stop_receiver) { return false; } let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) = prepare_thread_handler_common( progress_sender, CurrentStage::DuplicatePreHashing, non_cached_files_to_check.values().map(Vec::len).sum(), self.get_test_type(), ); debug!("Starting calculating prehash"); #[allow(clippy::type_complexity)] let pre_hash_results: Vec<(u64, BTreeMap>, Vec)> = non_cached_files_to_check .into_par_iter() .map(|(size, vec_file_entry)| { let mut hashmap_with_hash: BTreeMap> = Default::default(); let mut errors: Vec = Vec::new(); let mut buffer = [0u8; 1024 * 32]; atomic_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed); if check_if_stop_received(stop_receiver) { check_was_stopped.store(true, Ordering::Relaxed); return None; } for mut file_entry in vec_file_entry { match hash_calculation(&mut buffer, &file_entry, check_type, 0) { Ok(hash_string) => { file_entry.hash = hash_string.clone(); hashmap_with_hash.entry(hash_string).or_default().push(file_entry); } Err(s) => errors.push(s), } } Some((size, hashmap_with_hash, errors)) }) .while_some() .collect(); debug!("Completed calculating prehash"); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); // Saving into cache let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::DuplicatePreHashCacheSaving, 0, self.get_test_type()); // Add data from cache for (size, vec_file_entry) in &records_already_cached { pre_checked_map.entry(*size).or_default().append(&mut vec_file_entry.clone()); } // Check results for (size, hash_map, errors) in &pre_hash_results { if !errors.is_empty() { self.common_data.text_messages.warnings.append(&mut errors.clone()); } for vec_file_entry in hash_map.values() { if vec_file_entry.len() > 1 { pre_checked_map.entry(*size).or_default().append(&mut vec_file_entry.clone()); } } } self.prehash_save_cache_at_exit(loaded_hash_map, &pre_hash_results); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); if check_was_stopped.load(Ordering::Relaxed) || check_if_stop_received(stop_receiver) { return false; } true } fn diff_loaded_and_prechecked_files( function_name: &str, used_map: BTreeMap>, loaded_hash_map: &BTreeMap>, records_already_cached: &mut BTreeMap>, non_cached_files_to_check: &mut BTreeMap>, ) { debug!("{function_name} - started diff between loaded and prechecked files"); for (size, mut vec_file_entry) in used_map { if let Some(cached_vec_file_entry) = loaded_hash_map.get(&size) { // TODO maybe hashmap is not needed when using < 4 elements let mut cached_path_entries: HashMap<&Path, DuplicateEntry> = HashMap::new(); for file_entry in cached_vec_file_entry { cached_path_entries.insert(&file_entry.path, file_entry.clone()); } for file_entry in vec_file_entry { if let Some(cached_file_entry) = cached_path_entries.remove(file_entry.path.as_path()) { records_already_cached.entry(size).or_default().push(cached_file_entry); } else { non_cached_files_to_check.entry(size).or_default().push(file_entry); } } } else { non_cached_files_to_check.entry(size).or_default().append(&mut vec_file_entry); } } debug!( "{function_name} - completed diff between loaded and prechecked files - {}({}) non cached, {}({}) already cached", non_cached_files_to_check.len(), format_size(non_cached_files_to_check.values().map(|v| v.iter().map(|e| e.size).sum::()).sum::(), BINARY), records_already_cached.len(), format_size(records_already_cached.values().map(|v| v.iter().map(|e| e.size).sum::()).sum::(), BINARY), ); } #[fun_time(message = "full_hashing_load_cache_at_start", level = "debug")] fn full_hashing_load_cache_at_start( &mut self, mut pre_checked_map: BTreeMap>, ) -> (BTreeMap>, BTreeMap>, BTreeMap>) { let loaded_hash_map; let mut records_already_cached: BTreeMap> = Default::default(); let mut non_cached_files_to_check: BTreeMap> = Default::default(); if self.common_data.use_cache { debug!("full_hashing_load_cache_at_start - using cache"); let (messages, loaded_items) = load_cache_from_file_generalized_by_size::( &get_duplicate_cache_file(&self.get_params().hash_type, false), self.get_delete_outdated_cache(), &pre_checked_map, ); self.get_text_messages_mut().extend_with_another_messages(messages); loaded_hash_map = loaded_items.unwrap_or_default(); Self::diff_loaded_and_prechecked_files( "full_hashing_load_cache_at_start", pre_checked_map, &loaded_hash_map, &mut records_already_cached, &mut non_cached_files_to_check, ); } else { debug!("full_hashing_load_cache_at_start - not using cache"); loaded_hash_map = Default::default(); mem::swap(&mut pre_checked_map, &mut non_cached_files_to_check); } (loaded_hash_map, records_already_cached, non_cached_files_to_check) } #[fun_time(message = "full_hashing_save_cache_at_exit", level = "debug")] fn full_hashing_save_cache_at_exit( &mut self, records_already_cached: BTreeMap>, full_hash_results: &mut Vec<(u64, BTreeMap>, Vec)>, loaded_hash_map: BTreeMap>, ) { if !self.common_data.use_cache { return; } 'main: for (size, vec_file_entry) in records_already_cached { // Check if size already exists, if exists we must to change it outside because cannot have mut and non mut reference to full_hash_results for (full_size, full_hashmap, _errors) in &mut (*full_hash_results) { if size == *full_size { for file_entry in vec_file_entry { full_hashmap.entry(file_entry.hash.clone()).or_default().push(file_entry); } continue 'main; } } // Size doesn't exists add results to files let mut temp_hashmap: BTreeMap> = Default::default(); for file_entry in vec_file_entry { temp_hashmap.entry(file_entry.hash.clone()).or_default().push(file_entry); } full_hash_results.push((size, temp_hashmap, Vec::new())); } // Must save all results to file, old loaded from file with all currently counted results let mut all_results: BTreeMap = Default::default(); for (_size, vec_file_entry) in loaded_hash_map { for file_entry in vec_file_entry { all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry); } } for (_size, hashmap, _errors) in full_hash_results { for vec_file_entry in hashmap.values() { for file_entry in vec_file_entry { all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry.clone()); } } } let messages = save_cache_to_file_generalized( &get_duplicate_cache_file(&self.get_params().hash_type, false), &all_results, self.common_data.save_also_as_json, self.get_params().minimal_cache_file_size, ); self.get_text_messages_mut().extend_with_another_messages(messages); } #[fun_time(message = "full_hashing", level = "debug")] fn full_hashing(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>, pre_checked_map: BTreeMap>) -> bool { if pre_checked_map.is_empty() { return true; } let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::DuplicateCacheLoading, 0, self.get_test_type()); let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.full_hashing_load_cache_at_start(pre_checked_map); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); if check_if_stop_received(stop_receiver) { return false; } let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) = prepare_thread_handler_common( progress_sender, CurrentStage::DuplicateFullHashing, non_cached_files_to_check.values().map(Vec::len).sum(), self.get_test_type(), ); let check_type = self.get_params().hash_type; debug!("Starting full hashing of {} files", non_cached_files_to_check.values().map(Vec::len).sum::()); let mut full_hash_results: Vec<(u64, BTreeMap>, Vec)> = non_cached_files_to_check .into_par_iter() .map(|(size, vec_file_entry)| { let mut hashmap_with_hash: BTreeMap> = Default::default(); let mut errors: Vec = Vec::new(); let mut buffer = [0u8; 1024 * 16]; atomic_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed); for mut file_entry in vec_file_entry { if check_if_stop_received(stop_receiver) { check_was_stopped.store(true, Ordering::Relaxed); return None; } match hash_calculation(&mut buffer, &file_entry, check_type, u64::MAX) { Ok(hash_string) => { file_entry.hash = hash_string.clone(); hashmap_with_hash.entry(hash_string.clone()).or_default().push(file_entry); } Err(s) => errors.push(s), } } Some((size, hashmap_with_hash, errors)) }) .while_some() .collect(); debug!("Finished full hashing"); // Even if clicked stop, save items to cache and show results send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::DuplicateCacheSaving, 0, self.get_test_type()); self.full_hashing_save_cache_at_exit(records_already_cached, &mut full_hash_results, loaded_hash_map); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); for (size, hash_map, mut errors) in full_hash_results { self.common_data.text_messages.warnings.append(&mut errors); for (_hash, vec_file_entry) in hash_map { if vec_file_entry.len() > 1 { self.files_with_identical_hashes.entry(size).or_default().push(vec_file_entry); } } } true } #[fun_time(message = "hash_reference_folders", level = "debug")] fn hash_reference_folders(&mut self) { // Reference - only use in size, because later hash will be counted differently if self.common_data.use_reference_folders { let vec = mem::take(&mut self.files_with_identical_hashes) .into_iter() .filter_map(|(_size, vec_vec_file_entry)| { let mut all_results_with_same_size = Vec::new(); for vec_file_entry in vec_vec_file_entry { let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) = vec_file_entry .into_iter() .partition(|e| self.common_data.directories.is_in_referenced_directory(e.get_path())); if normal_files.is_empty() { continue; } if let Some(file) = files_from_referenced_folders.pop() { all_results_with_same_size.push((file, normal_files)); } } if all_results_with_same_size.is_empty() { None } else { Some(all_results_with_same_size) } }) .collect::)>>>(); for vec_of_vec in vec { self.files_with_identical_hashes_referenced.insert(vec_of_vec[0].0.size, vec_of_vec); } } if self.common_data.use_reference_folders { for (size, vector_vectors) in &self.files_with_identical_hashes_referenced { for (_fe, vector) in vector_vectors { self.information.number_of_duplicated_files_by_hash += vector.len(); self.information.number_of_groups_by_hash += 1; self.information.lost_space_by_hash += (vector.len() as u64) * size; } } } else { for (size, vector_vectors) in &self.files_with_identical_hashes { for vector in vector_vectors { self.information.number_of_duplicated_files_by_hash += vector.len() - 1; self.information.number_of_groups_by_hash += 1; self.information.lost_space_by_hash += (vector.len() as u64 - 1) * size; } } } } #[fun_time(message = "check_files_hash", level = "debug")] fn check_files_hash(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { assert_eq!(self.get_params().check_method, CheckingMethod::Hash); let mut pre_checked_map: BTreeMap> = Default::default(); if !self.prehashing(stop_receiver, progress_sender, &mut pre_checked_map) { return false; } if !self.full_hashing(stop_receiver, progress_sender, pre_checked_map) { return false; } self.hash_reference_folders(); // Clean unused data self.files_with_identical_size = Default::default(); true } #[fun_time(message = "delete_files", level = "debug")] fn delete_files(&mut self) { if self.common_data.delete_method == DeleteMethod::None { return; } match self.get_params().check_method { CheckingMethod::Name => { let vec_files = self.files_with_identical_names.values().collect::>(); delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); } CheckingMethod::SizeName => { let vec_files = self.files_with_identical_size_names.values().collect::>(); delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); } CheckingMethod::Hash => { for vec_files in self.files_with_identical_hashes.values() { let vev: Vec<&Vec> = vec_files.iter().collect::>(); delete_files_custom(&vev, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); } } CheckingMethod::Size => { let vec_files = self.files_with_identical_size.values().collect::>(); delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); } _ => panic!(), } } } impl DuplicateFinder { pub fn get_params(&self) -> &DuplicateFinderParameters { &self.params } pub const fn get_files_sorted_by_names(&self) -> &BTreeMap> { &self.files_with_identical_names } pub const fn get_files_sorted_by_size(&self) -> &BTreeMap> { &self.files_with_identical_size } pub const fn get_files_sorted_by_size_name(&self) -> &BTreeMap<(u64, String), Vec> { &self.files_with_identical_size_names } pub const fn get_files_sorted_by_hash(&self) -> &BTreeMap>> { &self.files_with_identical_hashes } pub const fn get_information(&self) -> &Info { &self.information } pub fn set_dry_run(&mut self, dry_run: bool) { self.common_data.dry_run = dry_run; } pub fn get_use_reference(&self) -> bool { self.common_data.use_reference_folders } pub fn get_files_with_identical_hashes_referenced(&self) -> &BTreeMap)>> { &self.files_with_identical_hashes_referenced } pub fn get_files_with_identical_name_referenced(&self) -> &BTreeMap)> { &self.files_with_identical_names_referenced } pub fn get_files_with_identical_size_referenced(&self) -> &BTreeMap)> { &self.files_with_identical_size_referenced } pub fn get_files_with_identical_size_names_referenced(&self) -> &BTreeMap<(u64, String), (DuplicateEntry, Vec)> { &self.files_with_identical_size_names_referenced } } impl DebugPrint for DuplicateFinder { fn debug_print(&self) { if !cfg!(debug_assertions) { return; } println!("---------------DEBUG PRINT---------------"); println!( "Number of duplicated files by size(in groups) - {} ({})", self.information.number_of_duplicated_files_by_size, self.information.number_of_groups_by_size ); println!( "Number of duplicated files by hash(in groups) - {} ({})", self.information.number_of_duplicated_files_by_hash, self.information.number_of_groups_by_hash ); println!( "Number of duplicated files by name(in groups) - {} ({})", self.information.number_of_duplicated_files_by_name, self.information.number_of_groups_by_name ); println!( "Lost space by size - {} ({} bytes)", format_size(self.information.lost_space_by_size, BINARY), self.information.lost_space_by_size ); println!( "Lost space by hash - {} ({} bytes)", format_size(self.information.lost_space_by_hash, BINARY), self.information.lost_space_by_hash ); println!("### Other"); println!("Files list size - {}", self.files_with_identical_size.len()); println!("Hashed Files list size - {}", self.files_with_identical_hashes.len()); println!("Files with identical names - {}", self.files_with_identical_names.len()); println!("Files with identical size names - {}", self.files_with_identical_size_names.len()); println!("Files with identical names referenced - {}", self.files_with_identical_names_referenced.len()); println!("Files with identical size names referenced - {}", self.files_with_identical_size_names_referenced.len()); println!("Files with identical size referenced - {}", self.files_with_identical_size_referenced.len()); println!("Files with identical hashes referenced - {}", self.files_with_identical_hashes_referenced.len()); println!("Checking Method - {:?}", self.get_params().check_method); self.debug_print_common(); println!("-----------------------------------------"); } } impl PrintResults for DuplicateFinder { fn write_results(&self, writer: &mut T) -> io::Result<()> { writeln!( writer, "Results of searching {:?} (reference directories {:?}) with excluded directories {:?} and excluded items {:?}", self.common_data.directories.included_directories, self.common_data.directories.reference_directories, self.common_data.directories.excluded_directories, self.common_data.excluded_items.get_excluded_items() )?; match self.get_params().check_method { CheckingMethod::Name => { if !self.files_with_identical_names.is_empty() { writeln!( writer, "-------------------------------------------------Files with same names-------------------------------------------------" )?; writeln!( writer, "Found {} files in {} groups with same name(may have different content)", self.information.number_of_duplicated_files_by_name, self.information.number_of_groups_by_name, )?; for (name, vector) in self.files_with_identical_names.iter().rev() { writeln!(writer, "Name - {} - {} files ", name, vector.len())?; for j in vector { writeln!(writer, "\"{}\"", j.path.to_string_lossy())?; } writeln!(writer)?; } } else if !self.files_with_identical_names_referenced.is_empty() { writeln!( writer, "-------------------------------------------------Files with same names in referenced folders-------------------------------------------------" )?; writeln!( writer, "Found {} files in {} groups with same name(may have different content)", self.information.number_of_duplicated_files_by_name, self.information.number_of_groups_by_name, )?; for (name, (file_entry, vector)) in self.files_with_identical_names_referenced.iter().rev() { writeln!(writer, "Name - {} - {} files ", name, vector.len())?; writeln!(writer, "Reference file - {:?}", file_entry.path)?; for j in vector { writeln!(writer, "\"{}\"", j.path.to_string_lossy())?; } writeln!(writer)?; } } else { write!(writer, "Not found any files with same names.")?; } } CheckingMethod::SizeName => { if !self.files_with_identical_names.is_empty() { writeln!( writer, "-------------------------------------------------Files with same size and names-------------------------------------------------" )?; writeln!( writer, "Found {} files in {} groups with same size and name(may have different content)", self.information.number_of_duplicated_files_by_size_name, self.information.number_of_groups_by_size_name, )?; for ((size, name), vector) in self.files_with_identical_size_names.iter().rev() { writeln!(writer, "Name - {}, {} - {} files ", name, format_size(*size, BINARY), vector.len())?; for j in vector { writeln!(writer, "\"{}\"", j.path.to_string_lossy())?; } writeln!(writer)?; } } else if !self.files_with_identical_names_referenced.is_empty() { writeln!( writer, "-------------------------------------------------Files with same size and names in referenced folders-------------------------------------------------" )?; writeln!( writer, "Found {} files in {} groups with same size and name(may have different content)", self.information.number_of_duplicated_files_by_size_name, self.information.number_of_groups_by_size_name, )?; for ((size, name), (file_entry, vector)) in self.files_with_identical_size_names_referenced.iter().rev() { writeln!(writer, "Name - {}, {} - {} files ", name, format_size(*size, BINARY), vector.len())?; writeln!(writer, "Reference file - {:?}", file_entry.path)?; for j in vector { writeln!(writer, "\"{}\"", j.path.to_string_lossy())?; } writeln!(writer)?; } } else { write!(writer, "Not found any files with same size and names.")?; } } CheckingMethod::Size => { if !self.files_with_identical_size.is_empty() { writeln!( writer, "-------------------------------------------------Files with same size-------------------------------------------------" )?; writeln!( writer, "Found {} duplicated files which in {} groups which takes {}.", self.information.number_of_duplicated_files_by_size, self.information.number_of_groups_by_size, format_size(self.information.lost_space_by_size, BINARY) )?; for (size, vector) in self.files_with_identical_size.iter().rev() { write!(writer, "\n---- Size {} ({}) - {} files \n", format_size(*size, BINARY), size, vector.len())?; for file_entry in vector { writeln!(writer, "\"{}\"", file_entry.path.to_string_lossy())?; } } } else if !self.files_with_identical_size_referenced.is_empty() { writeln!( writer, "-------------------------------------------------Files with same size in referenced folders-------------------------------------------------" )?; writeln!( writer, "Found {} duplicated files which in {} groups which takes {}.", self.information.number_of_duplicated_files_by_size, self.information.number_of_groups_by_size, format_size(self.information.lost_space_by_size, BINARY) )?; for (size, (file_entry, vector)) in self.files_with_identical_size_referenced.iter().rev() { writeln!(writer, "\n---- Size {} ({}) - {} files", format_size(*size, BINARY), size, vector.len())?; writeln!(writer, "Reference file - {:?}", file_entry.path)?; for file_entry in vector { writeln!(writer, "\"{}\"", file_entry.path.to_string_lossy())?; } } } else { write!(writer, "Not found any duplicates.")?; } } CheckingMethod::Hash => { if !self.files_with_identical_hashes.is_empty() { writeln!( writer, "-------------------------------------------------Files with same hashes-------------------------------------------------" )?; writeln!( writer, "Found {} duplicated files which in {} groups which takes {}.", self.information.number_of_duplicated_files_by_hash, self.information.number_of_groups_by_hash, format_size(self.information.lost_space_by_hash, BINARY) )?; for (size, vectors_vector) in self.files_with_identical_hashes.iter().rev() { for vector in vectors_vector { writeln!(writer, "\n---- Size {} ({}) - {} files", format_size(*size, BINARY), size, vector.len())?; for file_entry in vector { writeln!(writer, "\"{}\"", file_entry.path.to_string_lossy())?; } } } } else if !self.files_with_identical_hashes_referenced.is_empty() { writeln!( writer, "-------------------------------------------------Files with same hashes in referenced folders-------------------------------------------------" )?; writeln!( writer, "Found {} duplicated files which in {} groups which takes {}.", self.information.number_of_duplicated_files_by_hash, self.information.number_of_groups_by_hash, format_size(self.information.lost_space_by_hash, BINARY) )?; for (size, vectors_vector) in self.files_with_identical_hashes_referenced.iter().rev() { for (file_entry, vector) in vectors_vector { writeln!(writer, "\n---- Size {} ({}) - {} files", format_size(*size, BINARY), size, vector.len())?; writeln!(writer, "Reference file - \"{}\"", file_entry.path.to_string_lossy())?; for file_entry in vector { writeln!(writer, "\"{}\"", file_entry.path.to_string_lossy())?; } } } } else { write!(writer, "Not found any duplicates.")?; } } _ => panic!(), } Ok(()) } // TODO - check if is possible to save also data in header about size and name in SizeName mode - https://github.com/qarmin/czkawka/issues/1137 fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> io::Result<()> { if self.get_use_reference() { match self.get_params().check_method { CheckingMethod::Name => self.save_results_to_file_as_json_internal(file_name, &self.files_with_identical_names_referenced, pretty_print), CheckingMethod::SizeName => { self.save_results_to_file_as_json_internal(file_name, &self.files_with_identical_size_names_referenced.values().collect::>(), pretty_print) } CheckingMethod::Size => self.save_results_to_file_as_json_internal(file_name, &self.files_with_identical_size_referenced, pretty_print), CheckingMethod::Hash => self.save_results_to_file_as_json_internal(file_name, &self.files_with_identical_hashes_referenced, pretty_print), _ => panic!(), } } else { match self.get_params().check_method { CheckingMethod::Name => self.save_results_to_file_as_json_internal(file_name, &self.files_with_identical_names, pretty_print), CheckingMethod::SizeName => self.save_results_to_file_as_json_internal(file_name, &self.files_with_identical_size_names.values().collect::>(), pretty_print), CheckingMethod::Size => self.save_results_to_file_as_json_internal(file_name, &self.files_with_identical_size, pretty_print), CheckingMethod::Hash => self.save_results_to_file_as_json_internal(file_name, &self.files_with_identical_hashes, pretty_print), _ => panic!(), } } } } #[cfg(target_family = "windows")] fn filter_hard_links(vec_file_entry: &[FileEntry]) -> Vec { let mut inodes: HashSet = HashSet::with_capacity(vec_file_entry.len()); let mut identical: Vec = Vec::with_capacity(vec_file_entry.len()); for f in vec_file_entry { if let Ok(meta) = file_id::get_low_res_file_id(&f.path) { if let file_id::FileId::HighRes { file_id, .. } = meta { if !inodes.insert(file_id) { continue; } } } identical.push(f.clone()); } identical } #[cfg(target_family = "unix")] fn filter_hard_links(vec_file_entry: &[FileEntry]) -> Vec { let mut inodes: HashSet = HashSet::with_capacity(vec_file_entry.len()); let mut identical: Vec = Vec::with_capacity(vec_file_entry.len()); for f in vec_file_entry { if let Ok(meta) = fs::metadata(&f.path) { if !inodes.insert(meta.ino()) { continue; } } identical.push(f.clone()); } identical } pub fn make_hard_link(src: &Path, dst: &Path) -> io::Result<()> { let dst_dir = dst.parent().ok_or_else(|| Error::new(ErrorKind::Other, "No parent"))?; let temp = dst_dir.join(TEMP_HARDLINK_FILE); fs::rename(dst, temp.as_path())?; let result = fs::hard_link(src, dst); if result.is_err() { fs::rename(temp.as_path(), dst)?; } fs::remove_file(temp)?; result } pub trait MyHasher { fn update(&mut self, bytes: &[u8]); fn finalize(&self) -> String; } fn hash_calculation(buffer: &mut [u8], file_entry: &DuplicateEntry, hash_type: HashType, limit: u64) -> Result { let mut file_handler = match File::open(&file_entry.path) { Ok(t) => t, Err(e) => return Err(format!("Unable to check hash of file {:?}, reason {e}", file_entry.path)), }; let hasher = &mut *hash_type.hasher(); let mut current_file_read_bytes: u64 = 0; loop { let n = match file_handler.read(buffer) { Ok(0) => break, Ok(t) => t, Err(e) => return Err(format!("Error happened when checking hash of file {:?}, reason {}", file_entry.path, e)), }; current_file_read_bytes += n as u64; hasher.update(&buffer[..n]); if current_file_read_bytes >= limit { break; } } Ok(hasher.finalize()) } impl MyHasher for blake3::Hasher { fn update(&mut self, bytes: &[u8]) { self.update(bytes); } fn finalize(&self) -> String { self.finalize().to_hex().to_string() } } impl MyHasher for crc32fast::Hasher { fn update(&mut self, bytes: &[u8]) { self.write(bytes); } fn finalize(&self) -> String { self.finish().to_string() } } impl MyHasher for Xxh3 { fn update(&mut self, bytes: &[u8]) { self.write(bytes); } fn finalize(&self) -> String { self.finish().to_string() } } impl CommonData for DuplicateFinder { fn get_cd(&self) -> &CommonToolData { &self.common_data } fn get_cd_mut(&mut self) -> &mut CommonToolData { &mut self.common_data } fn get_check_method(&self) -> CheckingMethod { self.get_params().check_method } } #[cfg(test)] mod tests { use std::fs::{read_dir, File, Metadata}; use std::io; #[cfg(target_family = "windows")] use std::os::fs::MetadataExt; #[cfg(target_family = "unix")] use std::os::unix::fs::MetadataExt; use std::path::PathBuf; use super::*; #[cfg(target_family = "unix")] fn assert_inode(before: &Metadata, after: &Metadata) { assert_eq!(before.ino(), after.ino()); } #[cfg(target_family = "windows")] fn assert_inode(_: &Metadata, _: &Metadata) {} #[test] fn test_make_hard_link() -> io::Result<()> { let dir = tempfile::Builder::new().tempdir()?; let (src, dst) = (dir.path().join("a"), dir.path().join("b")); File::create(&src)?; let metadata = fs::metadata(&src)?; File::create(&dst)?; make_hard_link(&src, &dst)?; assert_inode(&metadata, &fs::metadata(&dst)?); assert_eq!(metadata.permissions(), fs::metadata(&dst)?.permissions()); assert_eq!(metadata.modified()?, fs::metadata(&dst)?.modified()?); assert_inode(&metadata, &fs::metadata(&src)?); assert_eq!(metadata.permissions(), fs::metadata(&src)?.permissions()); assert_eq!(metadata.modified()?, fs::metadata(&src)?.modified()?); let mut actual = read_dir(&dir)?.flatten().map(|e| e.path()).collect::>(); actual.sort_unstable(); assert_eq!(vec![src, dst], actual); Ok(()) } #[test] fn test_make_hard_link_fails() -> io::Result<()> { let dir = tempfile::Builder::new().tempdir()?; let (src, dst) = (dir.path().join("a"), dir.path().join("b")); File::create(&dst)?; let metadata = fs::metadata(&dst)?; assert!(make_hard_link(&src, &dst).is_err()); assert_inode(&metadata, &fs::metadata(&dst)?); assert_eq!(metadata.permissions(), fs::metadata(&dst)?.permissions()); assert_eq!(metadata.modified()?, fs::metadata(&dst)?.modified()?); assert_eq!(vec![dst], read_dir(&dir)?.flatten().map(|e| e.path()).collect::>()); Ok(()) } #[test] fn test_filter_hard_links_empty() { let expected: Vec = Default::default(); assert_eq!(expected, filter_hard_links(&[])); } #[cfg(target_family = "unix")] #[test] fn test_filter_hard_links() -> io::Result<()> { let dir = tempfile::Builder::new().tempdir()?; let (src, dst) = (dir.path().join("a"), dir.path().join("b")); File::create(&src)?; fs::hard_link(src.clone(), dst.clone())?; let e1 = FileEntry { path: src, ..Default::default() }; let e2 = FileEntry { path: dst, ..Default::default() }; let actual = filter_hard_links(&[e1.clone(), e2]); assert_eq!(vec![e1], actual); Ok(()) } #[test] fn test_filter_hard_links_regular_files() -> io::Result<()> { let dir = tempfile::Builder::new().tempdir()?; let (src, dst) = (dir.path().join("a"), dir.path().join("b")); File::create(&src)?; File::create(&dst)?; let e1 = FileEntry { path: src, ..Default::default() }; let e2 = FileEntry { path: dst, ..Default::default() }; let actual = filter_hard_links(&[e1.clone(), e2.clone()]); assert_eq!(vec![e1, e2], actual); Ok(()) } #[test] fn test_hash_calculation() -> io::Result<()> { let dir = tempfile::Builder::new().tempdir()?; let mut buf = [0u8; 1 << 10]; let src = dir.path().join("a"); let mut file = File::create(&src)?; file.write_all(b"aa")?; let e = DuplicateEntry { path: src, ..Default::default() }; let r = hash_calculation(&mut buf, &e, HashType::Blake3, 0).expect("hash_calculation failed"); assert!(!r.is_empty()); Ok(()) } #[test] fn test_hash_calculation_limit() -> io::Result<()> { let dir = tempfile::Builder::new().tempdir()?; let mut buf = [0u8; 1]; let src = dir.path().join("a"); let mut file = File::create(&src)?; file.write_all(b"aa")?; let e = DuplicateEntry { path: src, ..Default::default() }; let r1 = hash_calculation(&mut buf, &e, HashType::Blake3, 1).expect("hash_calculation failed"); let r2 = hash_calculation(&mut buf, &e, HashType::Blake3, 2).expect("hash_calculation failed"); let r3 = hash_calculation(&mut buf, &e, HashType::Blake3, u64::MAX).expect("hash_calculation failed"); assert_ne!(r1, r2); assert_eq!(r2, r3); Ok(()) } #[test] fn test_hash_calculation_invalid_file() -> io::Result<()> { let dir = tempfile::Builder::new().tempdir()?; let mut buf = [0u8; 1 << 10]; let src = dir.path().join("a"); let e = DuplicateEntry { path: src, ..Default::default() }; let r = hash_calculation(&mut buf, &e, HashType::Blake3, 0).expect_err("hash_calculation succeeded"); assert!(!r.is_empty()); Ok(()) } } czkawka_core-8.0.0/src/empty_files.rs000064400000000000000000000112661046102023000157570ustar 00000000000000use std::fs; use std::io::prelude::*; use crossbeam_channel::{Receiver, Sender}; use fun_time::fun_time; use log::debug; use crate::common_dir_traversal::{DirTraversalBuilder, DirTraversalResult, FileEntry, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::*; use crate::progress_data::ProgressData; #[derive(Default)] pub struct Info { pub number_of_empty_files: usize, } pub struct EmptyFiles { common_data: CommonToolData, information: Info, empty_files: Vec, } impl CommonData for EmptyFiles { fn get_cd(&self) -> &CommonToolData { &self.common_data } fn get_cd_mut(&mut self) -> &mut CommonToolData { &mut self.common_data } } impl EmptyFiles { pub fn new() -> Self { Self { common_data: CommonToolData::new(ToolType::EmptyFiles), information: Info::default(), empty_files: vec![], } } #[fun_time(message = "find_empty_files", level = "info")] pub fn find_empty_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) { self.prepare_items(); if !self.check_files(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } self.delete_files(); self.debug_print(); } #[fun_time(message = "check_files", level = "debug")] fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { let result = DirTraversalBuilder::new() .common_data(&self.common_data) .group_by(|_fe| ()) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .minimal_file_size(0) .maximal_file_size(0) .build() .run(); match result { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { self.empty_files = grouped_file_entries.into_values().flatten().collect(); self.information.number_of_empty_files = self.empty_files.len(); self.common_data.text_messages.warnings.extend(warnings); debug!("Found {} empty files.", self.information.number_of_empty_files); true } DirTraversalResult::Stopped => false, } } #[fun_time(message = "delete_files", level = "debug")] fn delete_files(&mut self) { match self.common_data.delete_method { DeleteMethod::Delete => { for file_entry in &self.empty_files { if fs::remove_file(&file_entry.path).is_err() { self.common_data.text_messages.warnings.push(file_entry.path.to_string_lossy().to_string()); } } } DeleteMethod::None => { //Just do nothing } _ => { unreachable!() } } } } impl Default for EmptyFiles { fn default() -> Self { Self::new() } } impl DebugPrint for EmptyFiles { fn debug_print(&self) { if !cfg!(debug_assertions) { return; } println!("---------------DEBUG PRINT---------------"); println!("Empty list size - {}", self.empty_files.len()); self.debug_print_common(); println!("-----------------------------------------"); } } impl PrintResults for EmptyFiles { fn write_results(&self, writer: &mut T) -> std::io::Result<()> { writeln!( writer, "Results of searching {:?} with excluded directories {:?} and excluded items {:?}", self.common_data.directories.included_directories, self.common_data.directories.excluded_directories, self.common_data.excluded_items.get_excluded_items() )?; if !self.empty_files.is_empty() { writeln!(writer, "Found {} empty files.", self.information.number_of_empty_files)?; for file_entry in &self.empty_files { writeln!(writer, "\"{}\"", file_entry.path.to_string_lossy())?; } } else { write!(writer, "Not found any empty files.")?; } Ok(()) } fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()> { self.save_results_to_file_as_json_internal(file_name, &self.empty_files, pretty_print) } } impl EmptyFiles { pub const fn get_empty_files(&self) -> &Vec { &self.empty_files } pub const fn get_information(&self) -> &Info { &self.information } } czkawka_core-8.0.0/src/empty_folder.rs000064400000000000000000000326601046102023000161310ustar 00000000000000use std::collections::HashMap; use std::fs; use std::fs::DirEntry; use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::atomic::Ordering; use crossbeam_channel::{Receiver, Sender}; use fun_time::fun_time; use log::debug; use rayon::prelude::*; use crate::common::{check_if_stop_received, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads}; use crate::common_dir_traversal::{common_get_entry_data, common_get_metadata_dir, common_read_dir, get_modified_time, ToolType}; use crate::common_directory::Directories; use crate::common_items::ExcludedItems; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::{DebugPrint, PrintResults}; use crate::progress_data::{CurrentStage, ProgressData}; #[derive(Clone, Debug)] pub struct FolderEntry { pub path: PathBuf, pub(crate) parent_path: Option, // Usable only when finding pub(crate) is_empty: FolderEmptiness, pub modified_date: u64, } impl FolderEntry { pub fn get_modified_date(&self) -> u64 { self.modified_date } } pub struct EmptyFolder { common_data: CommonToolData, information: Info, empty_folder_list: HashMap, // Path, FolderEntry } /// Enum with values which show if folder is empty. /// In function "`optimize_folders`" automatically "Maybe" is changed to "Yes", so it is not necessary to put it here #[derive(Eq, PartialEq, Copy, Clone, Debug)] pub(crate) enum FolderEmptiness { No, Maybe, } #[derive(Default)] pub struct Info { pub number_of_empty_folders: usize, } impl EmptyFolder { pub fn new() -> Self { Self { common_data: CommonToolData::new(ToolType::EmptyFolders), information: Default::default(), empty_folder_list: Default::default(), } } pub const fn get_empty_folder_list(&self) -> &HashMap { &self.empty_folder_list } pub const fn get_information(&self) -> &Info { &self.information } #[fun_time(message = "find_empty_folders", level = "info")] pub fn find_empty_folders(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) { self.prepare_items(); if !self.check_for_empty_folders(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } self.optimize_folders(); self.delete_files(); self.debug_print(); } fn optimize_folders(&mut self) { let mut new_directory_folders: HashMap = Default::default(); for (name, folder_entry) in &self.empty_folder_list { match &folder_entry.parent_path { Some(t) => { if !self.empty_folder_list.contains_key(t) { new_directory_folders.insert(name.clone(), folder_entry.clone()); } } None => { new_directory_folders.insert(name.clone(), folder_entry.clone()); } } } self.empty_folder_list = new_directory_folders; self.information.number_of_empty_folders = self.empty_folder_list.len(); } #[fun_time(message = "check_for_empty_folders", level = "debug")] fn check_for_empty_folders(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { let mut folders_to_check: Vec = self.common_data.directories.included_directories.clone(); let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::CollectingFiles, 0, self.get_test_type()); let excluded_items = self.common_data.excluded_items.clone(); let directories = self.common_data.directories.clone(); let mut non_empty_folders: Vec = vec![]; let mut start_folder_entries = Vec::with_capacity(folders_to_check.len()); let mut new_folder_entries_list = Vec::new(); for dir in &folders_to_check { start_folder_entries.push(FolderEntry { path: dir.clone(), parent_path: None, is_empty: FolderEmptiness::Maybe, modified_date: 0, }); } while !folders_to_check.is_empty() { if check_if_stop_received(stop_receiver) { send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); return false; } let segments: Vec<_> = folders_to_check .into_par_iter() .map(|current_folder| { let mut dir_result = vec![]; let mut warnings = vec![]; let mut non_empty_folder = None; let mut folder_entries_list = vec![]; let current_folder_as_string = current_folder.to_string_lossy().to_string(); let Some(read_dir) = common_read_dir(¤t_folder, &mut warnings) else { return (dir_result, warnings, Some(current_folder_as_string), folder_entries_list); }; let mut counter = 0; // Check every sub folder/file/link etc. for entry in read_dir { let Some(entry_data) = common_get_entry_data(&entry, &mut warnings, ¤t_folder) else { continue; }; let Ok(file_type) = entry_data.file_type() else { continue }; if file_type.is_dir() { counter += 1; Self::process_dir_in_dir_mode( ¤t_folder, ¤t_folder_as_string, entry_data, &directories, &mut dir_result, &mut warnings, &excluded_items, &mut non_empty_folder, &mut folder_entries_list, ); } else { if non_empty_folder.is_none() { non_empty_folder = Some(current_folder_as_string.clone()); } } } if counter > 0 { // Increase counter in batch, because usually it may be slow to add multiple times atomic value atomic_counter.fetch_add(counter, Ordering::Relaxed); } (dir_result, warnings, non_empty_folder, folder_entries_list) }) .collect(); let required_size = segments.iter().map(|(segment, _, _, _)| segment.len()).sum::(); folders_to_check = Vec::with_capacity(required_size); // Process collected data for (segment, warnings, non_empty_folder, fe_list) in segments { folders_to_check.extend(segment); if !warnings.is_empty() { self.common_data.text_messages.warnings.extend(warnings); } if let Some(non_empty_folder) = non_empty_folder { non_empty_folders.push(non_empty_folder); } new_folder_entries_list.push(fe_list); } } let mut folder_entries: HashMap = HashMap::with_capacity(start_folder_entries.len() + new_folder_entries_list.iter().map(Vec::len).sum::()); for fe in start_folder_entries { folder_entries.insert(fe.path.to_string_lossy().to_string(), fe); } for fe_list in new_folder_entries_list { for fe in fe_list { folder_entries.insert(fe.path.to_string_lossy().to_string(), fe); } } // Start to for current_folder in non_empty_folders.into_iter().rev() { Self::set_as_not_empty_folder(&mut folder_entries, ¤t_folder); } for (name, folder_entry) in folder_entries { if folder_entry.is_empty != FolderEmptiness::No { self.empty_folder_list.insert(name, folder_entry); } } debug!("Found {} empty folders.", self.empty_folder_list.len()); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); true } pub(crate) fn set_as_not_empty_folder(folder_entries: &mut HashMap, current_folder: &str) { let mut d = folder_entries .get_mut(current_folder) .unwrap_or_else(|| panic!("Folder {current_folder} not found in folder_entries")); if d.is_empty == FolderEmptiness::No { return; // Already set as non empty by one of his child } // Loop to recursively set as non empty this and all his parent folders loop { d.is_empty = FolderEmptiness::No; if let Some(parent_path) = &d.parent_path { let cf = parent_path.clone(); d = folder_entries.get_mut(&cf).unwrap_or_else(|| panic!("Folder {cf} not found in folder_entries")); if d.is_empty == FolderEmptiness::No { break; // Already set as non empty, so one of child already set it to non empty } } else { break; } } } fn process_dir_in_dir_mode( current_folder: &Path, current_folder_as_str: &str, entry_data: &DirEntry, directories: &Directories, dir_result: &mut Vec, warnings: &mut Vec, excluded_items: &ExcludedItems, non_empty_folder: &mut Option, folder_entries_list: &mut Vec, ) { let next_folder = entry_data.path(); if excluded_items.is_excluded(&next_folder) || directories.is_excluded(&next_folder) { if non_empty_folder.is_none() { *non_empty_folder = Some(current_folder_as_str.to_string()); } return; } #[cfg(target_family = "unix")] if directories.exclude_other_filesystems() { match directories.is_on_other_filesystems(&next_folder) { Ok(true) => return, Err(e) => warnings.push(e), _ => (), } } let Some(metadata) = common_get_metadata_dir(entry_data, warnings, &next_folder) else { if non_empty_folder.is_none() { *non_empty_folder = Some(current_folder_as_str.to_string()); } return; }; dir_result.push(next_folder.clone()); folder_entries_list.push(FolderEntry { path: next_folder, parent_path: Some(current_folder_as_str.to_string()), is_empty: FolderEmptiness::Maybe, modified_date: get_modified_time(&metadata, warnings, current_folder, true), }); } #[fun_time(message = "delete_files", level = "debug")] fn delete_files(&mut self) { if self.get_delete_method() == DeleteMethod::None { return; } let folders_to_remove = self.empty_folder_list.keys().collect::>(); let errors: Vec<_> = folders_to_remove .into_par_iter() .filter_map(|name| { if let Err(e) = fs::remove_dir_all(name) { Some(format!("Failed to remove folder {name:?}, reason {e}")) } else { None } }) .collect(); self.get_text_messages_mut().errors.extend(errors); } } impl Default for EmptyFolder { fn default() -> Self { Self::new() } } impl DebugPrint for EmptyFolder { fn debug_print(&self) { if !cfg!(debug_assertions) { return; } println!("---------------DEBUG PRINT---------------"); println!("Number of empty folders - {}", self.information.number_of_empty_folders); self.debug_print_common(); println!("-----------------------------------------"); } } impl PrintResults for EmptyFolder { fn write_results(&self, writer: &mut T) -> std::io::Result<()> { if !self.empty_folder_list.is_empty() { writeln!(writer, "--------------------------Empty folder list--------------------------")?; writeln!(writer, "Found {} empty folders", self.information.number_of_empty_folders)?; let mut empty_folder_list = self.empty_folder_list.keys().collect::>(); empty_folder_list.par_sort_unstable(); for name in empty_folder_list { writeln!(writer, "{name}")?; } } else { write!(writer, "Not found any empty folders.")?; } Ok(()) } fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()> { self.save_results_to_file_as_json_internal(file_name, &self.empty_folder_list.keys().collect::>(), pretty_print) } } impl CommonData for EmptyFolder { fn get_cd(&self) -> &CommonToolData { &self.common_data } fn get_cd_mut(&mut self) -> &mut CommonToolData { &mut self.common_data } } czkawka_core-8.0.0/src/invalid_symlinks.rs000064400000000000000000000171401046102023000170130ustar 00000000000000use std::fs; use std::io::prelude::*; use std::path::{Path, PathBuf}; use crossbeam_channel::{Receiver, Sender}; use fun_time::fun_time; use log::debug; use serde::{Deserialize, Serialize}; use crate::common_dir_traversal::{Collect, DirTraversalBuilder, DirTraversalResult, ErrorType, FileEntry, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::*; use crate::progress_data::ProgressData; #[derive(Default)] pub struct Info { pub number_of_invalid_symlinks: usize, } const MAX_NUMBER_OF_SYMLINK_JUMPS: i32 = 20; #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] pub struct SymlinkInfo { pub destination_path: PathBuf, pub type_of_error: ErrorType, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct SymlinksFileEntry { pub path: PathBuf, pub size: u64, pub modified_date: u64, pub symlink_info: SymlinkInfo, } impl ResultEntry for SymlinksFileEntry { fn get_path(&self) -> &Path { &self.path } fn get_modified_date(&self) -> u64 { self.modified_date } fn get_size(&self) -> u64 { self.size } } impl FileEntry { fn into_symlinks_entry(self, symlink_info: SymlinkInfo) -> SymlinksFileEntry { SymlinksFileEntry { size: self.size, path: self.path, modified_date: self.modified_date, symlink_info, } } } pub struct InvalidSymlinks { common_data: CommonToolData, information: Info, invalid_symlinks: Vec, } impl InvalidSymlinks { pub fn new() -> Self { Self { common_data: CommonToolData::new(ToolType::InvalidSymlinks), information: Info::default(), invalid_symlinks: vec![], } } #[fun_time(message = "find_invalid_links", level = "info")] pub fn find_invalid_links(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) { self.prepare_items(); if !self.check_files(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } self.delete_files(); self.debug_print(); } #[fun_time(message = "check_files", level = "debug")] fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { let result = DirTraversalBuilder::new() .common_data(&self.common_data) .group_by(|_fe| ()) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .collect(Collect::InvalidSymlinks) .build() .run(); match result { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { self.invalid_symlinks = grouped_file_entries .into_values() .flatten() .filter_map(|e| { let (destination_path, type_of_error) = Self::check_invalid_symlinks(&e.path)?; Some(e.into_symlinks_entry(SymlinkInfo { destination_path, type_of_error })) }) .collect(); self.information.number_of_invalid_symlinks = self.invalid_symlinks.len(); self.common_data.text_messages.warnings.extend(warnings); debug!("Found {} invalid symlinks.", self.information.number_of_invalid_symlinks); true } DirTraversalResult::Stopped => false, } } fn check_invalid_symlinks(current_file_name: &Path) -> Option<(PathBuf, ErrorType)> { let mut destination_path = PathBuf::new(); let type_of_error; match current_file_name.read_link() { Ok(t) => { destination_path.push(t); let mut number_of_loop = 0; let mut current_path = current_file_name.to_path_buf(); loop { if number_of_loop == 0 && !current_path.exists() { type_of_error = ErrorType::NonExistentFile; break; } if number_of_loop == MAX_NUMBER_OF_SYMLINK_JUMPS { type_of_error = ErrorType::InfiniteRecursion; break; } current_path = match current_path.read_link() { Ok(t) => t, Err(_inspected) => { // Looks that some next symlinks are broken, but we do nothing with it - TODO why they are broken return None; } }; number_of_loop += 1; } } Err(_inspected) => { // Failed to load info about it type_of_error = ErrorType::NonExistentFile; } } Some((destination_path, type_of_error)) } #[fun_time(message = "delete_files", level = "debug")] fn delete_files(&mut self) { match self.common_data.delete_method { DeleteMethod::Delete => { for file_entry in &self.invalid_symlinks { if fs::remove_file(&file_entry.path).is_err() { self.common_data.text_messages.warnings.push(file_entry.path.to_string_lossy().to_string()); } } } DeleteMethod::None => { //Just do nothing } _ => unreachable!(), } } } impl Default for InvalidSymlinks { fn default() -> Self { Self::new() } } impl DebugPrint for InvalidSymlinks { fn debug_print(&self) { if !cfg!(debug_assertions) { return; } println!("---------------DEBUG PRINT---------------"); println!("Invalid symlinks list size - {}", self.invalid_symlinks.len()); self.debug_print_common(); println!("-----------------------------------------"); } } impl PrintResults for InvalidSymlinks { fn write_results(&self, writer: &mut T) -> std::io::Result<()> { if !self.invalid_symlinks.is_empty() { writeln!(writer, "Found {} invalid symlinks.", self.information.number_of_invalid_symlinks)?; for file_entry in &self.invalid_symlinks { writeln!( writer, "\"{}\"\t\t\"{}\"\t\t{}", file_entry.path.to_string_lossy(), file_entry.symlink_info.destination_path.to_string_lossy(), match file_entry.symlink_info.type_of_error { ErrorType::InfiniteRecursion => "Infinite Recursion", ErrorType::NonExistentFile => "Non Existent File", } )?; } } else { write!(writer, "Not found any invalid symlinks.")?; } Ok(()) } fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()> { self.save_results_to_file_as_json_internal(file_name, &self.invalid_symlinks, pretty_print) } } impl CommonData for InvalidSymlinks { fn get_cd(&self) -> &CommonToolData { &self.common_data } fn get_cd_mut(&mut self) -> &mut CommonToolData { &mut self.common_data } } impl InvalidSymlinks { pub const fn get_invalid_symlinks(&self) -> &Vec { &self.invalid_symlinks } pub const fn get_information(&self) -> &Info { &self.information } } czkawka_core-8.0.0/src/lib.rs000064400000000000000000000014411046102023000141770ustar 00000000000000#![allow(clippy::collapsible_else_if)] #![allow(clippy::type_complexity)] #![allow(clippy::needless_late_init)] #![allow(clippy::too_many_arguments)] #![warn(clippy::unwrap_used)] #[macro_use] extern crate bitflags; pub mod big_file; pub mod broken_files; pub mod duplicate; pub mod empty_files; pub mod empty_folder; pub mod invalid_symlinks; pub mod same_music; pub mod similar_images; pub mod similar_videos; pub mod temporary; pub mod bad_extensions; pub mod common; pub mod common_cache; pub mod common_dir_traversal; pub mod common_directory; pub mod common_extensions; pub mod common_image; pub mod common_items; pub mod common_messages; pub mod common_tool; pub mod common_traits; pub mod localizer_core; pub mod progress_data; pub const CZKAWKA_VERSION: &str = env!("CARGO_PKG_VERSION"); czkawka_core-8.0.0/src/localizer_core.rs000064400000000000000000000027351046102023000164340ustar 00000000000000use std::collections::HashMap; use i18n_embed::fluent::{fluent_language_loader, FluentLanguageLoader}; use i18n_embed::{DefaultLocalizer, LanguageLoader, Localizer}; use once_cell::sync::Lazy; use rust_embed::RustEmbed; #[derive(RustEmbed)] #[folder = "i18n/"] struct Localizations; pub static LANGUAGE_LOADER_CORE: Lazy = Lazy::new(|| { let loader: FluentLanguageLoader = fluent_language_loader!(); loader.load_fallback_language(&Localizations).expect("Error while loading fallback language"); loader }); #[macro_export] macro_rules! flc { ($message_id:literal) => {{ i18n_embed_fl::fl!($crate::localizer_core::LANGUAGE_LOADER_CORE, $message_id) }}; ($message_id:literal, $($args:expr),*) => {{ i18n_embed_fl::fl!($crate::localizer_core::LANGUAGE_LOADER_CORE, $message_id, $($args), *) }}; } // Get the `Localizer` to be used for localizing this library. pub fn localizer_core() -> Box { Box::from(DefaultLocalizer::new(&*LANGUAGE_LOADER_CORE, &Localizations)) } pub fn generate_translation_hashmap(vec: Vec<(&'static str, String)>) -> HashMap<&'static str, String> { let mut hashmap: HashMap<&'static str, String> = Default::default(); for (key, value) in vec { hashmap.insert(key, value); } hashmap } pub fn fnc_get_similarity_very_high() -> String { flc!("core_similarity_very_high") } pub fn fnc_get_similarity_minimal() -> String { flc!("core_similarity_minimal") } czkawka_core-8.0.0/src/progress_data.rs000064400000000000000000000206631046102023000162750ustar 00000000000000use crate::common_dir_traversal::{CheckingMethod, ToolType}; // Empty files // 0 - Collecting files // Empty folders // 0 - Collecting folders // Big files // 0 - Collecting files // Same music // 0 - Collecting files // 1 - Loading cache // 2 - Checking tags // 3 - Saving cache // 4 - TAGS - Comparing tags // 4 - CONTENT - Loading cache // 5 - CONTENT - Calculating fingerprints // 6 - CONTENT - Saving cache // 7 - CONTENT - Comparing fingerprints // Similar images // 0 - Collecting files // 1 - Scanning images // 2 - Comparing hashes // Similar videos // 0 - Collecting files // 1 - Scanning videos // Temporary files // 0 - Collecting files // Invalid symlinks // 0 - Collecting files // Broken files // 0 - Collecting files // 1 - Scanning files // Bad extensions // 0 - Collecting files // 1 - Scanning files // Duplicates - Hash // 0 - Collecting files // 1 - Loading cache // 2 - Hash - first 1KB file // 3 - Saving cache // 4 - Loading cache // 5 - Hash - normal hash // 6 - Saving cache // Duplicates - Name or SizeName or Size // 0 - Collecting files #[derive(Debug)] pub struct ProgressData { pub sstage: CurrentStage, pub checking_method: CheckingMethod, pub current_stage_idx: u8, pub max_stage_idx: u8, pub entries_checked: usize, pub entries_to_check: usize, pub tool_type: ToolType, } #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub enum CurrentStage { CollectingFiles, DuplicateCacheSaving, DuplicateCacheLoading, DuplicatePreHashCacheSaving, DuplicatePreHashCacheLoading, DuplicateScanningName, DuplicateScanningSizeName, DuplicateScanningSize, DuplicatePreHashing, DuplicateFullHashing, SameMusicCacheSavingTags, SameMusicCacheLoadingTags, SameMusicCacheSavingFingerprints, SameMusicCacheLoadingFingerprints, SameMusicReadingTags, SameMusicCalculatingFingerprints, SameMusicComparingTags, SameMusicComparingFingerprints, SimilarImagesCalculatingHashes, SimilarImagesComparingHashes, SimilarVideosCalculatingHashes, BrokenFilesChecking, BadExtensionsChecking, } impl ProgressData { // TODO change validations to debug_asserts // Currently are too flaky to run asserts in normal builds pub fn validate(&self) { assert!( self.current_stage_idx <= self.max_stage_idx, "Current stage index: {}, max stage index: {}, stage {:?}", self.current_stage_idx, self.max_stage_idx, self.sstage ); assert_eq!( self.max_stage_idx, self.tool_type.get_max_stage(self.checking_method), "Max stage index: {}, tool type: {:?}, checking method: {:?}", self.max_stage_idx, self.tool_type, self.checking_method ); // TODO not sure about other types // may need to be changed if self.sstage != CurrentStage::CollectingFiles { assert!( self.entries_checked <= self.entries_to_check, "Entries checked: {}, entries to check: {}, stage {:?}", self.entries_checked, self.entries_to_check, self.sstage ); } let tool_type_checking_method: Option = match self.checking_method { CheckingMethod::AudioTags | CheckingMethod::AudioContent => Some(ToolType::SameMusic), CheckingMethod::Name | CheckingMethod::SizeName | CheckingMethod::Size | CheckingMethod::Hash => Some(ToolType::Duplicate), CheckingMethod::None => None, }; if let Some(tool_type) = tool_type_checking_method { assert_eq!(self.tool_type, tool_type, "Tool type: {:?}, checking method: {:?}", self.tool_type, self.checking_method); } let tool_type_current_stage: Option = match self.sstage { CurrentStage::CollectingFiles => None, CurrentStage::DuplicateCacheSaving | CurrentStage::DuplicateCacheLoading | CurrentStage::DuplicatePreHashCacheSaving | CurrentStage::DuplicatePreHashCacheLoading => { Some(ToolType::Duplicate) } CurrentStage::DuplicateScanningName | CurrentStage::DuplicateScanningSizeName | CurrentStage::DuplicateScanningSize | CurrentStage::DuplicatePreHashing | CurrentStage::DuplicateFullHashing => Some(ToolType::Duplicate), CurrentStage::SameMusicCacheLoadingTags | CurrentStage::SameMusicCacheSavingTags | CurrentStage::SameMusicCacheLoadingFingerprints | CurrentStage::SameMusicCacheSavingFingerprints | CurrentStage::SameMusicComparingTags | CurrentStage::SameMusicReadingTags | CurrentStage::SameMusicComparingFingerprints | CurrentStage::SameMusicCalculatingFingerprints => Some(ToolType::SameMusic), CurrentStage::SimilarImagesCalculatingHashes | CurrentStage::SimilarImagesComparingHashes => Some(ToolType::SimilarImages), CurrentStage::SimilarVideosCalculatingHashes => Some(ToolType::SimilarVideos), CurrentStage::BrokenFilesChecking => Some(ToolType::BrokenFiles), CurrentStage::BadExtensionsChecking => Some(ToolType::BadExtensions), }; if let Some(tool_type) = tool_type_current_stage { assert_eq!(self.tool_type, tool_type, "Tool type: {:?}, stage {:?}", self.tool_type, self.sstage); } } } impl ToolType { pub fn get_max_stage(&self, checking_method: CheckingMethod) -> u8 { match *self { ToolType::Duplicate => 6, ToolType::EmptyFolders | ToolType::EmptyFiles | ToolType::InvalidSymlinks | ToolType::BigFile | ToolType::TemporaryFiles => 0, ToolType::BrokenFiles | ToolType::BadExtensions | ToolType::SimilarVideos => 1, ToolType::SimilarImages => 2, ToolType::None => unreachable!("ToolType::None is not allowed"), ToolType::SameMusic => match checking_method { CheckingMethod::AudioTags => 4, CheckingMethod::AudioContent => 7, _ => unreachable!("CheckingMethod {checking_method:?} in same music mode is not allowed"), }, } } } impl CurrentStage { pub fn get_current_stage(&self) -> u8 { #[allow(clippy::match_same_arms)] // Now it is easier to read match self { CurrentStage::CollectingFiles => 0, CurrentStage::DuplicateScanningName => 0, CurrentStage::DuplicateScanningSizeName => 0, CurrentStage::DuplicateScanningSize => 0, CurrentStage::DuplicatePreHashCacheLoading => 1, CurrentStage::DuplicatePreHashing => 2, CurrentStage::DuplicatePreHashCacheSaving => 3, CurrentStage::DuplicateCacheLoading => 4, CurrentStage::DuplicateFullHashing => 5, CurrentStage::DuplicateCacheSaving => 6, CurrentStage::SimilarImagesCalculatingHashes => 1, CurrentStage::SimilarImagesComparingHashes => 2, CurrentStage::SimilarVideosCalculatingHashes => 1, CurrentStage::BrokenFilesChecking => 1, CurrentStage::BadExtensionsChecking => 1, CurrentStage::SameMusicCacheLoadingTags => 1, CurrentStage::SameMusicReadingTags => 2, CurrentStage::SameMusicCacheSavingTags => 3, CurrentStage::SameMusicComparingTags => 4, CurrentStage::SameMusicCacheLoadingFingerprints => 4, CurrentStage::SameMusicCalculatingFingerprints => 5, CurrentStage::SameMusicCacheSavingFingerprints => 6, CurrentStage::SameMusicComparingFingerprints => 7, } } pub fn check_if_loading_saving_cache(&self) -> bool { self.check_if_saving_cache() || self.check_if_loading_cache() } pub fn check_if_loading_cache(&self) -> bool { matches!( self, CurrentStage::SameMusicCacheLoadingFingerprints | CurrentStage::SameMusicCacheLoadingTags | CurrentStage::DuplicateCacheLoading | CurrentStage::DuplicatePreHashCacheLoading ) } pub fn check_if_saving_cache(&self) -> bool { matches!( self, CurrentStage::SameMusicCacheSavingFingerprints | CurrentStage::SameMusicCacheSavingTags | CurrentStage::DuplicateCacheSaving | CurrentStage::DuplicatePreHashCacheSaving ) } } czkawka_core-8.0.0/src/same_music.rs000064400000000000000000001232001046102023000155540ustar 00000000000000use std::collections::{BTreeMap, HashSet}; use std::fs::File; use std::io::prelude::*; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::{mem, panic}; use anyhow::Context; use crossbeam_channel::{Receiver, Sender}; use fun_time::fun_time; use humansize::{format_size, BINARY}; use lofty::file::{AudioFile, TaggedFileExt}; use lofty::prelude::*; use lofty::read_from; use log::debug; use rayon::prelude::*; use rusty_chromaprint::{match_fingerprints, Configuration, Fingerprinter}; use serde::{Deserialize, Serialize}; use symphonia::core::audio::SampleBuffer; use symphonia::core::codecs::{DecoderOptions, CODEC_TYPE_NULL}; use symphonia::core::formats::FormatOptions; use symphonia::core::io::MediaSourceStream; use symphonia::core::meta::MetadataOptions; use symphonia::core::probe::Hint; use crate::common::{ check_if_stop_received, create_crash_message, delete_files_custom, filter_reference_folders_generic, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS, }; use crate::common_cache::{extract_loaded_cache, get_similar_music_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized}; use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::*; use crate::progress_data::{CurrentStage, ProgressData}; bitflags! { #[derive(PartialEq, Copy, Clone, Debug)] pub struct MusicSimilarity : u32 { const NONE = 0; const TRACK_TITLE = 0b1; const TRACK_ARTIST = 0b10; const YEAR = 0b100; const LENGTH = 0b1000; const GENRE = 0b10000; const BITRATE = 0b10_0000; } } #[derive(Clone, Debug, Deserialize, Serialize)] pub struct MusicEntry { pub size: u64, pub path: PathBuf, pub modified_date: u64, pub fingerprint: Vec, pub track_title: String, pub track_artist: String, pub year: String, pub length: String, pub genre: String, pub bitrate: u32, } impl ResultEntry for MusicEntry { fn get_path(&self) -> &Path { &self.path } fn get_modified_date(&self) -> u64 { self.modified_date } fn get_size(&self) -> u64 { self.size } } impl FileEntry { fn into_music_entry(self) -> MusicEntry { MusicEntry { size: self.size, path: self.path, modified_date: self.modified_date, fingerprint: vec![], track_title: String::new(), track_artist: String::new(), year: String::new(), length: String::new(), genre: String::new(), bitrate: 0, } } } struct GroupedFilesToCheck { pub base_files: Vec, pub files_to_compare: Vec, } #[derive(Default)] pub struct Info { pub number_of_duplicates: usize, pub number_of_groups: u64, } pub struct SameMusicParameters { pub music_similarity: MusicSimilarity, pub approximate_comparison: bool, pub check_type: CheckingMethod, pub minimum_segment_duration: f32, pub maximum_difference: f64, pub compare_fingerprints_only_with_similar_titles: bool, } impl SameMusicParameters { pub fn new( music_similarity: MusicSimilarity, approximate_comparison: bool, check_type: CheckingMethod, minimum_segment_duration: f32, maximum_difference: f64, compare_fingerprints_only_with_similar_titles: bool, ) -> Self { assert!(!music_similarity.is_empty()); assert!([CheckingMethod::AudioTags, CheckingMethod::AudioContent].contains(&check_type)); Self { music_similarity, approximate_comparison, check_type, minimum_segment_duration, maximum_difference, compare_fingerprints_only_with_similar_titles, } } } pub struct SameMusic { common_data: CommonToolData, information: Info, music_to_check: BTreeMap, music_entries: Vec, duplicated_music_entries: Vec>, duplicated_music_entries_referenced: Vec<(MusicEntry, Vec)>, hash_preset_config: Configuration, params: SameMusicParameters, } impl SameMusic { pub fn new(params: SameMusicParameters) -> Self { Self { common_data: CommonToolData::new(ToolType::SameMusic), information: Info::default(), music_entries: Vec::with_capacity(2048), duplicated_music_entries: vec![], music_to_check: Default::default(), duplicated_music_entries_referenced: vec![], hash_preset_config: Configuration::preset_test1(), // TODO allow to change this and move to parameters params, } } #[fun_time(message = "find_same_music", level = "info")] pub fn find_same_music(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) { self.prepare_items(); self.common_data.use_reference_folders = !self.common_data.directories.reference_directories.is_empty(); if !self.check_files(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } match self.params.check_type { CheckingMethod::AudioTags => { if !self.read_tags(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } if !self.check_for_duplicate_tags(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } } CheckingMethod::AudioContent => { if !self.read_tags(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } if !self.calculate_fingerprint(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } if !self.check_for_duplicate_fingerprints(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } } _ => panic!(), } self.delete_files(); self.debug_print(); } #[fun_time(message = "check_files", level = "debug")] fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { self.common_data.extensions.set_and_validate_allowed_extensions(AUDIO_FILES_EXTENSIONS); if !self.common_data.extensions.set_any_extensions() { return true; } let result = DirTraversalBuilder::new() .group_by(|_fe| ()) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .common_data(&self.common_data) .checking_method(self.params.check_type) .build() .run(); match result { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { self.music_to_check = grouped_file_entries .into_values() .flatten() .map(|fe| (fe.path.to_string_lossy().to_string(), fe.into_music_entry())) .collect(); self.common_data.text_messages.warnings.extend(warnings); debug!("check_files - Found {} music files.", self.music_to_check.len()); true } DirTraversalResult::Stopped => false, } } #[fun_time(message = "load_cache", level = "debug")] fn load_cache(&mut self, checking_tags: bool) -> (BTreeMap, BTreeMap, BTreeMap) { let loaded_hash_map; let mut records_already_cached: BTreeMap = Default::default(); let mut non_cached_files_to_check: BTreeMap = Default::default(); if self.common_data.use_cache { let (messages, loaded_items) = load_cache_from_file_generalized_by_path::(&get_similar_music_cache_file(checking_tags), self.get_delete_outdated_cache(), &self.music_to_check); self.get_text_messages_mut().extend_with_another_messages(messages); loaded_hash_map = loaded_items.unwrap_or_default(); debug!("load_cache - Starting to check for differences"); extract_loaded_cache( &loaded_hash_map, mem::take(&mut self.music_to_check), &mut records_already_cached, &mut non_cached_files_to_check, ); debug!( "load_cache - completed diff between loaded and prechecked files, {}({}) - non cached, {}({}) - already cached", non_cached_files_to_check.len(), format_size(non_cached_files_to_check.values().map(|e| e.size).sum::(), BINARY), records_already_cached.len(), format_size(records_already_cached.values().map(|e| e.size).sum::(), BINARY), ); } else { loaded_hash_map = Default::default(); mem::swap(&mut self.music_to_check, &mut non_cached_files_to_check); } (loaded_hash_map, records_already_cached, non_cached_files_to_check) } #[fun_time(message = "save_cache", level = "debug")] fn save_cache(&mut self, vec_file_entry: Vec, loaded_hash_map: BTreeMap, checking_tags: bool) { if !self.common_data.use_cache { return; } // Must save all results to file, old loaded from file with all currently counted results let mut all_results: BTreeMap = loaded_hash_map; for file_entry in vec_file_entry { all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry); } let messages = save_cache_to_file_generalized(&get_similar_music_cache_file(checking_tags), &all_results, self.common_data.save_also_as_json, 0); self.get_text_messages_mut().extend_with_another_messages(messages); } #[fun_time(message = "calculate_fingerprint", level = "debug")] fn calculate_fingerprint(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { if self.music_entries.is_empty() { return true; } // We only calculate fingerprints, for files with similar titles // This saves a lot of time, because we don't need to calculate and later compare fingerprints for files with different titles if self.params.compare_fingerprints_only_with_similar_titles { let grouped_by_title: BTreeMap> = Self::get_entries_grouped_by_title(mem::take(&mut self.music_entries)); self.music_to_check = grouped_by_title .into_iter() .filter_map(|(_title, entries)| if entries.len() >= 2 { Some(entries) } else { None }) .flatten() .map(|e| (e.path.to_string_lossy().to_string(), e)) .collect(); } else { self.music_to_check = mem::take(&mut self.music_entries).into_iter().map(|e| (e.path.to_string_lossy().to_string(), e)).collect(); } let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::SameMusicCacheLoadingFingerprints, 0, self.get_test_type()); let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.load_cache(false); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); if check_if_stop_received(stop_receiver) { return false; } let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) = prepare_thread_handler_common( progress_sender, CurrentStage::SameMusicCalculatingFingerprints, non_cached_files_to_check.len(), self.get_test_type(), ); let configuration = &self.hash_preset_config; debug!("calculate_fingerprint - starting fingerprinting"); let mut vec_file_entry = non_cached_files_to_check .into_par_iter() .map(|(path, mut music_entry)| { atomic_counter.fetch_add(1, Ordering::Relaxed); if check_if_stop_received(stop_receiver) { check_was_stopped.store(true, Ordering::Relaxed); return None; } let Ok(fingerprint) = calc_fingerprint_helper(path, configuration) else { return Some(None); }; music_entry.fingerprint = fingerprint; Some(Some(music_entry)) }) .while_some() .flatten() .collect::>(); debug!("calculate_fingerprint - ended fingerprinting"); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::SameMusicCacheSavingFingerprints, 0, self.get_test_type()); // Just connect loaded results with already calculated vec_file_entry.extend(records_already_cached.into_values()); self.music_entries = vec_file_entry.clone(); self.save_cache(vec_file_entry, loaded_hash_map, false); // Break if stop was clicked after saving to cache send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); if check_was_stopped.load(Ordering::Relaxed) || check_if_stop_received(stop_receiver) { return false; } true } #[fun_time(message = "read_tags", level = "debug")] fn read_tags(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { if self.music_to_check.is_empty() { return true; } let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::SameMusicCacheLoadingTags, 0, self.get_test_type()); let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.load_cache(true); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); if check_if_stop_received(stop_receiver) { return false; } let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::SameMusicReadingTags, non_cached_files_to_check.len(), self.get_test_type()); debug!("read_tags - starting reading tags"); // Clean for duplicate files let mut vec_file_entry = non_cached_files_to_check .into_par_iter() .map(|(path, mut music_entry)| { atomic_counter.fetch_add(1, Ordering::Relaxed); if check_if_stop_received(stop_receiver) { check_was_stopped.store(true, Ordering::Relaxed); return None; } if read_single_file_tags(&path, &mut music_entry) { Some(Some(music_entry)) } else { Some(None) } }) .while_some() .flatten() .collect::>(); debug!("read_tags - ended reading tags"); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::SameMusicCacheSavingTags, 0, self.get_test_type()); // Just connect loaded results with already calculated vec_file_entry.extend(records_already_cached.into_values()); self.music_entries = vec_file_entry.clone(); self.save_cache(vec_file_entry, loaded_hash_map, true); // Break if stop was clicked after saving to cache send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); if check_was_stopped.load(Ordering::Relaxed) { return false; } true } #[fun_time(message = "check_for_duplicate_tags", level = "debug")] fn check_for_duplicate_tags(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { if self.music_entries.is_empty() { return true; } let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::SameMusicComparingTags, self.music_entries.len(), self.get_test_type()); let mut old_duplicates: Vec> = vec![self.music_entries.clone()]; let mut new_duplicates: Vec> = Vec::new(); if (self.params.music_similarity & MusicSimilarity::TRACK_TITLE) == MusicSimilarity::TRACK_TITLE { if check_if_stop_received(stop_receiver) { send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); return false; } old_duplicates = self.check_music_item(old_duplicates, &atomic_counter, |fe| &fe.track_title, self.params.approximate_comparison); } if (self.params.music_similarity & MusicSimilarity::TRACK_ARTIST) == MusicSimilarity::TRACK_ARTIST { if check_if_stop_received(stop_receiver) { send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); return false; } old_duplicates = self.check_music_item(old_duplicates, &atomic_counter, |fe| &fe.track_artist, self.params.approximate_comparison); } if (self.params.music_similarity & MusicSimilarity::YEAR) == MusicSimilarity::YEAR { if check_if_stop_received(stop_receiver) { send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); return false; } old_duplicates = self.check_music_item(old_duplicates, &atomic_counter, |fe| &fe.year, false); } if (self.params.music_similarity & MusicSimilarity::LENGTH) == MusicSimilarity::LENGTH { if check_if_stop_received(stop_receiver) { send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); return false; } old_duplicates = self.check_music_item(old_duplicates, &atomic_counter, |fe| &fe.length, false); } if (self.params.music_similarity & MusicSimilarity::GENRE) == MusicSimilarity::GENRE { if check_if_stop_received(stop_receiver) { send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); return false; } old_duplicates = self.check_music_item(old_duplicates, &atomic_counter, |fe| &fe.genre, false); } if (self.params.music_similarity & MusicSimilarity::BITRATE) == MusicSimilarity::BITRATE { if check_if_stop_received(stop_receiver) { send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); return false; } let old_duplicates_len = old_duplicates.len(); for vec_file_entry in old_duplicates { let mut hash_map: BTreeMap> = Default::default(); for file_entry in vec_file_entry { if file_entry.bitrate != 0 { let thing = file_entry.bitrate.to_string(); if !thing.is_empty() { hash_map.entry(thing.clone()).or_default().push(file_entry); } } } for (_title, vec_file_entry) in hash_map { if vec_file_entry.len() > 1 { new_duplicates.push(vec_file_entry); } } } atomic_counter.fetch_add(old_duplicates_len, Ordering::Relaxed); old_duplicates = new_duplicates; } send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); self.duplicated_music_entries = old_duplicates; if self.common_data.use_reference_folders { self.duplicated_music_entries_referenced = filter_reference_folders_generic(mem::take(&mut self.duplicated_music_entries), &self.common_data.directories); } if self.common_data.use_reference_folders { for (_fe, vector) in &self.duplicated_music_entries_referenced { self.information.number_of_duplicates += vector.len(); self.information.number_of_groups += 1; } } else { for vector in &self.duplicated_music_entries { self.information.number_of_duplicates += vector.len() - 1; self.information.number_of_groups += 1; } } // Clear unused data self.music_entries.clear(); true } fn split_fingerprints_to_base_and_files_to_compare(&self, music_data: Vec) -> (Vec, Vec) { if self.common_data.use_reference_folders { music_data.into_iter().partition(|f| self.common_data.directories.is_in_referenced_directory(f.get_path())) } else { (music_data.clone(), music_data) } } fn get_entries_grouped_by_title(music_data: Vec) -> BTreeMap> { let mut entries_grouped_by_title: BTreeMap> = BTreeMap::new(); for entry in music_data { let simplified_track_title = get_simplified_name(&entry.track_title); // TODO maybe add as option to check for empty titles? if simplified_track_title.is_empty() { continue; } entries_grouped_by_title.entry(simplified_track_title).or_default().push(entry); } entries_grouped_by_title } fn split_fingerprints_to_check(&mut self) -> Vec { if self.params.compare_fingerprints_only_with_similar_titles { let entries_grouped_by_title: BTreeMap> = Self::get_entries_grouped_by_title(mem::take(&mut self.music_entries)); entries_grouped_by_title .into_iter() .filter_map(|(_title, entries)| { let (base_files, files_to_compare) = self.split_fingerprints_to_base_and_files_to_compare(entries); // When there is 0 files in base files or files to compare there will be no comparison, so removing it from the list // Also when there is only one file in base files and files to compare and they are the same file, there will be no comparison if base_files.is_empty() || files_to_compare.is_empty() || (base_files.len() == 1 && files_to_compare.len() == 1 && (base_files[0].path == files_to_compare[0].path)) { return None; } Some(GroupedFilesToCheck { base_files, files_to_compare }) }) .collect() } else { let entries = mem::take(&mut self.music_entries); let (base_files, files_to_compare) = self.split_fingerprints_to_base_and_files_to_compare(entries); vec![GroupedFilesToCheck { base_files, files_to_compare }] } } #[fun_time(message = "compare_fingerprints", level = "debug")] fn compare_fingerprints( &mut self, stop_receiver: Option<&Receiver<()>>, atomic_counter: &Arc, base_files: Vec, files_to_compare: &[MusicEntry], ) -> Option>> { let mut used_paths: HashSet = Default::default(); let configuration = &self.hash_preset_config; let minimum_segment_duration = self.params.minimum_segment_duration; let maximum_difference = self.params.maximum_difference; let mut duplicated_music_entries = Vec::new(); for f_entry in base_files { atomic_counter.fetch_add(1, Ordering::Relaxed); if check_if_stop_received(stop_receiver) { return None; } let f_string = f_entry.path.to_string_lossy().to_string(); if used_paths.contains(&f_string) { continue; } let (mut collected_similar_items, errors): (Vec<_>, Vec<_>) = files_to_compare .par_iter() .map(|e_entry| { let e_string = e_entry.path.to_string_lossy().to_string(); if used_paths.contains(&e_string) || e_string == f_string { return None; } let mut segments = match match_fingerprints(&f_entry.fingerprint, &e_entry.fingerprint, configuration) { Ok(segments) => segments, Err(e) => return Some(Err(format!("Error while comparing fingerprints: {e}"))), }; segments.retain(|s| s.duration(configuration) > minimum_segment_duration && s.score < maximum_difference); if segments.is_empty() { None } else { Some(Ok((e_string, e_entry))) } }) .flatten() .partition_map(|res| match res { Ok(entry) => itertools::Either::Left(entry), Err(err) => itertools::Either::Right(err), }); self.common_data.text_messages.errors.extend(errors); collected_similar_items.retain(|(path, _entry)| !used_paths.contains(path)); if !collected_similar_items.is_empty() { let mut music_entries = Vec::new(); for (path, entry) in collected_similar_items { used_paths.insert(path); music_entries.push(entry.clone()); } used_paths.insert(f_string); music_entries.push(f_entry); duplicated_music_entries.push(music_entries); } } Some(duplicated_music_entries) } #[fun_time(message = "check_for_duplicate_fingerprints", level = "debug")] fn check_for_duplicate_fingerprints(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { if self.music_entries.is_empty() { return true; } let grouped_files_to_check = self.split_fingerprints_to_check(); let base_files_number = grouped_files_to_check.iter().map(|g| g.base_files.len()).sum::(); let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::SameMusicComparingFingerprints, base_files_number, self.get_test_type()); let mut duplicated_music_entries = Vec::new(); for group in grouped_files_to_check { let GroupedFilesToCheck { base_files, files_to_compare } = group; let Some(temp_music_entries) = self.compare_fingerprints(stop_receiver, &atomic_counter, base_files, &files_to_compare) else { send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); return false; }; duplicated_music_entries.extend(temp_music_entries); } send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); self.duplicated_music_entries = duplicated_music_entries; if self.common_data.use_reference_folders { self.duplicated_music_entries_referenced = filter_reference_folders_generic(mem::take(&mut self.duplicated_music_entries), &self.common_data.directories); } if self.common_data.use_reference_folders { for (_fe, vector) in &self.duplicated_music_entries_referenced { self.information.number_of_duplicates += vector.len(); self.information.number_of_groups += 1; } } else { for vector in &self.duplicated_music_entries { self.information.number_of_duplicates += vector.len() - 1; self.information.number_of_groups += 1; } } // Clear unused data self.music_entries.clear(); true } #[fun_time(message = "check_music_item", level = "debug")] fn check_music_item( &self, old_duplicates: Vec>, atomic_counter: &Arc, get_item: fn(&MusicEntry) -> &str, approximate_comparison: bool, ) -> Vec> { let mut new_duplicates: Vec<_> = Default::default(); let old_duplicates_len = old_duplicates.len(); for vec_file_entry in old_duplicates { let mut hash_map: BTreeMap> = Default::default(); for file_entry in vec_file_entry { let mut thing = get_item(&file_entry).trim().to_lowercase(); if approximate_comparison { thing = get_simplified_name(&thing); } if !thing.is_empty() { hash_map.entry(thing).or_default().push(file_entry); } } for (_title, vec_file_entry) in hash_map { if vec_file_entry.len() > 1 { new_duplicates.push(vec_file_entry); } } } atomic_counter.fetch_add(old_duplicates_len, Ordering::Relaxed); new_duplicates } #[fun_time(message = "delete_files", level = "debug")] fn delete_files(&mut self) { if self.common_data.delete_method == DeleteMethod::None { return; } let vec_files = self.duplicated_music_entries.iter().collect::>(); delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); } } impl SameMusic { pub const fn get_duplicated_music_entries(&self) -> &Vec> { &self.duplicated_music_entries } pub fn get_params(&self) -> &SameMusicParameters { &self.params } pub const fn get_information(&self) -> &Info { &self.information } pub fn get_similar_music_referenced(&self) -> &Vec<(MusicEntry, Vec)> { &self.duplicated_music_entries_referenced } pub fn get_number_of_base_duplicated_files(&self) -> usize { if self.common_data.use_reference_folders { self.duplicated_music_entries_referenced.len() } else { self.duplicated_music_entries.len() } } pub fn get_use_reference(&self) -> bool { self.common_data.use_reference_folders } } // TODO this should be taken from rusty-chromaprint repo, not reimplemented here fn calc_fingerprint_helper(path: impl AsRef, config: &Configuration) -> anyhow::Result> { let path = path.as_ref(); let src = File::open(path).context("failed to open file")?; let mss = MediaSourceStream::new(Box::new(src), Default::default()); let mut hint = Hint::new(); if let Some(ext) = path.extension().and_then(std::ffi::OsStr::to_str) { hint.with_extension(ext); } let meta_opts: MetadataOptions = Default::default(); let fmt_opts: FormatOptions = Default::default(); let probed = symphonia::default::get_probe().format(&hint, mss, &fmt_opts, &meta_opts).context("unsupported format")?; let mut format = probed.format; let track = format .tracks() .iter() .find(|t| t.codec_params.codec != CODEC_TYPE_NULL) .context("no supported audio tracks")?; let dec_opts: DecoderOptions = Default::default(); let mut decoder = symphonia::default::get_codecs().make(&track.codec_params, &dec_opts).context("unsupported codec")?; let track_id = track.id; let mut printer = Fingerprinter::new(config); let sample_rate = track.codec_params.sample_rate.context("missing sample rate")?; let channels = track.codec_params.channels.context("missing audio channels")?.count() as u32; printer.start(sample_rate, channels).context("initializing fingerprinter")?; let mut sample_buf = None; loop { let Ok(packet) = format.next_packet() else { break; }; if packet.track_id() != track_id { continue; } match decoder.decode(&packet) { Ok(audio_buf) => { if sample_buf.is_none() { let spec = *audio_buf.spec(); let duration = audio_buf.capacity() as u64; sample_buf = Some(SampleBuffer::::new(duration, spec)); } if let Some(buf) = &mut sample_buf { buf.copy_interleaved_ref(audio_buf); printer.consume(buf.samples()); } } Err(symphonia::core::errors::Error::DecodeError(_)) => (), Err(_) => break, } } printer.finish(); Ok(printer.fingerprint().to_vec()) } fn read_single_file_tags(path: &str, music_entry: &mut MusicEntry) -> bool { let Ok(mut file) = File::open(path) else { return false; }; let Ok(possible_tagged_file) = panic::catch_unwind(move || { match read_from(&mut file) { Ok(t) => Some(t), Err(_inspected) => { // println!("Failed to open {}", path); None } } }) else { let message = create_crash_message("Lofty", path, "https://github.com/image-rs/image/issues"); println!("{message}"); return false; }; let Some(tagged_file) = possible_tagged_file else { return true }; let properties = tagged_file.properties(); let mut track_title = String::new(); let mut track_artist = String::new(); let mut year = String::new(); let mut genre = String::new(); let bitrate = properties.audio_bitrate().unwrap_or(0); let mut length = properties.duration().as_millis().to_string(); if let Some(tag) = tagged_file.primary_tag() { track_title = tag.get_string(&ItemKey::TrackTitle).unwrap_or("").to_string(); track_artist = tag.get_string(&ItemKey::TrackArtist).unwrap_or("").to_string(); year = tag.get_string(&ItemKey::Year).unwrap_or("").to_string(); genre = tag.get_string(&ItemKey::Genre).unwrap_or("").to_string(); } for tag in tagged_file.tags() { if track_title.is_empty() { if let Some(tag_value) = tag.get_string(&ItemKey::TrackTitle) { track_title = tag_value.to_string(); } } if track_artist.is_empty() { if let Some(tag_value) = tag.get_string(&ItemKey::TrackArtist) { track_artist = tag_value.to_string(); } } if year.is_empty() { if let Some(tag_value) = tag.get_string(&ItemKey::Year) { year = tag_value.to_string(); } } if genre.is_empty() { if let Some(tag_value) = tag.get_string(&ItemKey::Genre) { genre = tag_value.to_string(); } } } if let Ok(old_length_number) = length.parse::() { let length_number = old_length_number / 60; let minutes = length_number / 1000; let seconds = (length_number % 1000) * 6 / 100; if minutes != 0 || seconds != 0 { length = format!("{minutes}:{seconds:02}"); } else if old_length_number > 0 { // That means, that audio have length smaller that second but not zero length = "0:01".to_string(); } else { length = String::new(); } } else { length = String::new(); } music_entry.track_title = track_title; music_entry.track_artist = track_artist; music_entry.year = year; music_entry.length = length; music_entry.genre = genre; music_entry.bitrate = bitrate; true } impl DebugPrint for SameMusic { #[fun_time(message = "debug_print", level = "debug")] fn debug_print(&self) { if !cfg!(debug_assertions) { return; } println!("---------------DEBUG PRINT---------------"); println!("Found files music - {}", self.music_entries.len()); println!("Found duplicated files music - {}", self.duplicated_music_entries.len()); self.debug_print_common(); println!("-----------------------------------------"); } } impl PrintResults for SameMusic { fn write_results(&self, writer: &mut T) -> std::io::Result<()> { if !self.duplicated_music_entries.is_empty() { writeln!(writer, "{} music files which have similar friends\n\n.", self.duplicated_music_entries.len())?; for vec_file_entry in &self.duplicated_music_entries { writeln!(writer, "Found {} music files which have similar friends", vec_file_entry.len())?; for file_entry in vec_file_entry { write_music_entry(writer, file_entry)?; } writeln!(writer)?; } } else if !self.duplicated_music_entries_referenced.is_empty() { writeln!(writer, "{} music files which have similar friends\n\n.", self.duplicated_music_entries_referenced.len())?; for (file_entry, vec_file_entry) in &self.duplicated_music_entries_referenced { writeln!(writer, "Found {} music files which have similar friends", vec_file_entry.len())?; writeln!(writer)?; write_music_entry(writer, file_entry)?; for file_entry in vec_file_entry { write_music_entry(writer, file_entry)?; } writeln!(writer)?; } } else { write!(writer, "Not found any similar music files.")?; } Ok(()) } fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()> { if self.get_use_reference() { self.save_results_to_file_as_json_internal(file_name, &self.duplicated_music_entries_referenced, pretty_print) } else { self.save_results_to_file_as_json_internal(file_name, &self.duplicated_music_entries, pretty_print) } } } fn write_music_entry(writer: &mut T, file_entry: &MusicEntry) -> std::io::Result<()> { writeln!( writer, "TT: {} - TA: {} - Y: {} - L: {} - G: {} - B: {} - P: \"{}\"", file_entry.track_title, file_entry.track_artist, file_entry.year, file_entry.length, file_entry.genre, file_entry.bitrate, file_entry.path.to_string_lossy() ) } fn get_simplified_name(what: &str) -> String { let mut new_what = String::with_capacity(what.len()); let mut tab_number = 0; let mut space_before = true; for character in what.chars() { match character { '(' | '[' => { tab_number += 1; } ')' | ']' => { if tab_number == 0 { // Nothing to do, not even save it to output } else { tab_number -= 1; } } ' ' => { if !space_before { new_what.push(' '); space_before = true; } } ch => { if tab_number == 0 { // Ignore all non alphabetic ascii characters like " or . if !ch.is_ascii() || ch.is_ascii_alphabetic() { space_before = false; new_what.push(ch); } else if !space_before { new_what.push(' '); space_before = true; } } } } } if new_what.ends_with(' ') { new_what.pop(); } new_what } impl CommonData for SameMusic { fn get_cd(&self) -> &CommonToolData { &self.common_data } fn get_cd_mut(&mut self) -> &mut CommonToolData { &mut self.common_data } fn get_check_method(&self) -> CheckingMethod { self.get_params().check_type } } #[cfg(test)] mod tests { use crate::same_music::get_simplified_name; #[test] fn test_strings() { let what = "roman ( ziemniak ) ".to_string(); let res = get_simplified_name(&what); assert_eq!(res, "roman"); let what = " HH) ".to_string(); let res = get_simplified_name(&what); assert_eq!(res, "HH"); let what = " fsf.f. ".to_string(); let res = get_simplified_name(&what); assert_eq!(res, "fsf f"); let what = "Kekistan (feat. roman) [Mix on Mix]".to_string(); let res = get_simplified_name(&what); assert_eq!(res, "Kekistan"); } } czkawka_core-8.0.0/src/similar_images.rs000064400000000000000000001624701046102023000164300ustar 00000000000000use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::atomic::Ordering; use std::time::SystemTime; use std::{mem, panic}; use bk_tree::BKTree; use crossbeam_channel::{Receiver, Sender}; use fun_time::fun_time; use humansize::{format_size, BINARY}; use image::GenericImageView; use image_hasher::{FilterType, HashAlg, HasherConfig}; use log::debug; use rayon::prelude::*; use serde::{Deserialize, Serialize}; use crate::common::{ check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, JXL_IMAGE_EXTENSIONS, RAW_IMAGE_EXTENSIONS, }; use crate::common_cache::{extract_loaded_cache, get_similar_images_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized}; use crate::common_dir_traversal::{inode, take_1_per_inode, DirTraversalBuilder, DirTraversalResult, FileEntry, ToolType}; use crate::common_image::get_dynamic_image_from_path; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::{DebugPrint, PrintResults, ResultEntry}; use crate::flc; use crate::progress_data::{CurrentStage, ProgressData}; type ImHash = Vec; // 40 is, similar like previous 20 in 8 hash size is useless // But since Krowka have problems with proper changing max value in fly // hardcode 40 as max value pub const SIMILAR_VALUES: [[u32; 6]; 4] = [ [1, 2, 5, 7, 14, 40], // 8 [2, 5, 15, 30, 40, 40], // 16 [4, 10, 20, 40, 40, 40], // 32 [6, 20, 40, 40, 40, 40], // 64 ]; #[derive(Clone, Debug, Serialize, Deserialize)] pub struct ImagesEntry { pub path: PathBuf, pub size: u64, pub width: u32, pub height: u32, pub modified_date: u64, pub hash: ImHash, pub similarity: u32, } impl ResultEntry for ImagesEntry { fn get_path(&self) -> &Path { &self.path } fn get_modified_date(&self) -> u64 { self.modified_date } fn get_size(&self) -> u64 { self.size } } impl FileEntry { fn into_images_entry(self) -> ImagesEntry { ImagesEntry { size: self.size, path: self.path, modified_date: self.modified_date, width: 0, height: 0, hash: Vec::new(), similarity: 0, } } } #[derive(Clone, Debug, Copy)] pub enum SimilarityPreset { Original, VeryHigh, High, Medium, Small, VerySmall, Minimal, None, } struct Hamming; impl bk_tree::Metric for Hamming { fn distance(&self, a: &ImHash, b: &ImHash) -> u32 { hamming::distance_fast(a, b).expect("Calculating hamming distance, cannot fail") as u32 } fn threshold_distance(&self, a: &ImHash, b: &ImHash, _threshold: u32) -> Option { Some(self.distance(a, b)) } } pub struct SimilarImagesParameters { pub similarity: u32, pub hash_size: u8, pub hash_alg: HashAlg, pub image_filter: FilterType, pub exclude_images_with_same_size: bool, pub ignore_hard_links: bool, } impl SimilarImagesParameters { pub fn new(similarity: u32, hash_size: u8, hash_alg: HashAlg, image_filter: FilterType, exclude_images_with_same_size: bool, ignore_hard_links: bool) -> Self { assert!([8, 16, 32, 64].contains(&hash_size)); Self { similarity, hash_size, hash_alg, image_filter, exclude_images_with_same_size, ignore_hard_links, } } } pub struct SimilarImages { common_data: CommonToolData, information: Info, bktree: BKTree, similar_vectors: Vec>, similar_referenced_vectors: Vec<(ImagesEntry, Vec)>, // Hashmap with image hashes and Vector with names of files image_hashes: HashMap>, images_to_check: BTreeMap, params: SimilarImagesParameters, } #[derive(Default)] pub struct Info { pub number_of_duplicates: usize, pub number_of_groups: u64, } impl SimilarImages { pub fn new(params: SimilarImagesParameters) -> Self { Self { common_data: CommonToolData::new(ToolType::SimilarImages), information: Default::default(), bktree: BKTree::new(Hamming), similar_vectors: vec![], similar_referenced_vectors: vec![], params, images_to_check: Default::default(), image_hashes: Default::default(), } } #[fun_time(message = "find_similar_images", level = "info")] pub fn find_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) { self.prepare_items(); self.common_data.use_reference_folders = !self.common_data.directories.reference_directories.is_empty(); if !self.check_for_similar_images(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } if !self.hash_images(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } if !self.find_similar_hashes(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } self.delete_files(); self.debug_print(); } #[fun_time(message = "check_for_similar_images", level = "debug")] fn check_for_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { if cfg!(feature = "heif") { self.common_data .extensions .set_and_validate_allowed_extensions(&[IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS, JXL_IMAGE_EXTENSIONS, HEIC_EXTENSIONS].concat()); } else { self.common_data .extensions .set_and_validate_allowed_extensions(&[IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS, JXL_IMAGE_EXTENSIONS].concat()); } if !self.common_data.extensions.set_any_extensions() { return true; } let result = DirTraversalBuilder::new() .group_by(inode) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .common_data(&self.common_data) .build() .run(); match result { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { self.images_to_check = grouped_file_entries .into_iter() .flat_map(if self.get_params().ignore_hard_links { |(_, fes)| fes } else { take_1_per_inode }) .map(|fe| { let fe_str = fe.path.to_string_lossy().to_string(); let image_entry = fe.into_images_entry(); (fe_str, image_entry) }) .collect(); self.common_data.text_messages.warnings.extend(warnings); debug!("check_files - Found {} image files.", self.images_to_check.len()); true } DirTraversalResult::Stopped => false, } } #[fun_time(message = "hash_images_load_cache", level = "debug")] fn hash_images_load_cache(&mut self) -> (BTreeMap, BTreeMap, BTreeMap) { let loaded_hash_map; let mut records_already_cached: BTreeMap = Default::default(); let mut non_cached_files_to_check: BTreeMap = Default::default(); if self.common_data.use_cache { let (messages, loaded_items) = load_cache_from_file_generalized_by_path::( &get_similar_images_cache_file(&self.get_params().hash_size, &self.get_params().hash_alg, &self.get_params().image_filter), self.get_delete_outdated_cache(), &self.images_to_check, ); self.get_text_messages_mut().extend_with_another_messages(messages); loaded_hash_map = loaded_items.unwrap_or_default(); debug!("hash_images-load_cache - starting calculating diff"); extract_loaded_cache( &loaded_hash_map, mem::take(&mut self.images_to_check), &mut records_already_cached, &mut non_cached_files_to_check, ); debug!( "hash_images_load_cache - completed diff between loaded and prechecked files, {}({}) - non cached, {}({}) - already cached", non_cached_files_to_check.len(), format_size(non_cached_files_to_check.values().map(|e| e.size).sum::(), BINARY), records_already_cached.len(), format_size(records_already_cached.values().map(|e| e.size).sum::(), BINARY), ); } else { loaded_hash_map = Default::default(); mem::swap(&mut self.images_to_check, &mut non_cached_files_to_check); } (loaded_hash_map, records_already_cached, non_cached_files_to_check) } // Cache algorithm: // - Load data from file // - Remove from data to search, already loaded entries from cache(size and modified date must match) // - Check hash of files which doesn't have saved entry // - Join already read hashes with hashes which were read from file // - Join all hashes and save it to file #[fun_time(message = "hash_images", level = "debug")] fn hash_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { if self.images_to_check.is_empty() { return true; } let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.hash_images_load_cache(); let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) = prepare_thread_handler_common( progress_sender, CurrentStage::SimilarImagesCalculatingHashes, non_cached_files_to_check.len(), self.get_test_type(), ); debug!("hash_images - start hashing images"); let (mut vec_file_entry, errors): (Vec, Vec) = non_cached_files_to_check .into_par_iter() .map(|(_s, mut file_entry)| { atomic_counter.fetch_add(1, Ordering::Relaxed); if check_if_stop_received(stop_receiver) { check_was_stopped.store(true, Ordering::Relaxed); return None; } if let Err(e) = self.collect_image_file_entry(&mut file_entry) { return Some(Err(e)); } Some(Ok(file_entry)) }) .while_some() .partition_map(|res| match res { Ok(entry) => itertools::Either::Left(entry), Err(err) => itertools::Either::Right(err), }); self.common_data.text_messages.errors.extend(errors); debug!("hash_images - end hashing {} images", vec_file_entry.len()); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); // Just connect loaded results with already calculated hashes for file_entry in records_already_cached.into_values() { vec_file_entry.push(file_entry); } // All valid entries are used to create bktree used to check for hash similarity for file_entry in &vec_file_entry { // Only use to comparing, non broken hashes(all 0 or 255 hashes means that algorithm fails to decode them because e.g. contains a lot of alpha channel) if !(file_entry.hash.is_empty() || file_entry.hash.iter().all(|e| *e == 0) || file_entry.hash.iter().all(|e| *e == 255)) { self.image_hashes.entry(file_entry.hash.clone()).or_default().push(file_entry.clone()); } } self.save_to_cache(vec_file_entry, loaded_hash_map); // Break if stop was clicked after saving to cache if check_was_stopped.load(Ordering::Relaxed) { return false; } true } #[fun_time(message = "save_to_cache", level = "debug")] fn save_to_cache(&mut self, vec_file_entry: Vec, loaded_hash_map: BTreeMap) { if self.common_data.use_cache { // Must save all results to file, old loaded from file with all currently counted results let mut all_results: BTreeMap = loaded_hash_map; for file_entry in vec_file_entry { all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry); } let messages = save_cache_to_file_generalized( &get_similar_images_cache_file(&self.get_params().hash_size, &self.get_params().hash_alg, &self.get_params().image_filter), &all_results, self.common_data.save_also_as_json, 0, ); self.get_text_messages_mut().extend_with_another_messages(messages); } } fn collect_image_file_entry(&self, file_entry: &mut ImagesEntry) -> Result<(), String> { let img = get_dynamic_image_from_path(&file_entry.path.to_string_lossy())?; let dimensions = img.dimensions(); file_entry.width = dimensions.0; file_entry.height = dimensions.1; let hasher_config = HasherConfig::new() .hash_size(self.get_params().hash_size as u32, self.get_params().hash_size as u32) .hash_alg(self.get_params().hash_alg) .resize_filter(self.get_params().image_filter); let hasher = hasher_config.to_hasher(); let hash = hasher.hash_image(&img); file_entry.hash = hash.as_bytes().to_vec(); Ok(()) } // Split hashes at 2 parts, base hashes and hashes to compare, 3 argument is set of hashes with multiple images #[fun_time(message = "split_hashes", level = "debug")] fn split_hashes(&mut self, all_hashed_images: &HashMap>) -> (Vec, HashSet) { let hashes_with_multiple_images: HashSet = all_hashed_images .iter() .filter_map(|(hash, vec_file_entry)| { if vec_file_entry.len() >= 2 { return Some(hash.clone()); }; None }) .collect(); let mut base_hashes = Vec::new(); // Initial hashes if self.common_data.use_reference_folders { let mut files_from_referenced_folders: HashMap> = HashMap::new(); let mut normal_files: HashMap> = HashMap::new(); all_hashed_images.clone().into_iter().for_each(|(hash, vec_file_entry)| { for file_entry in vec_file_entry { if is_in_reference_folder(&self.common_data.directories.reference_directories, &file_entry.path) { files_from_referenced_folders.entry(hash.clone()).or_default().push(file_entry); } else { normal_files.entry(hash.clone()).or_default().push(file_entry); } } }); for hash in normal_files.into_keys() { self.bktree.add(hash); } for hash in files_from_referenced_folders.into_keys() { base_hashes.push(hash); } } else { for original_hash in all_hashed_images.keys() { self.bktree.add(original_hash.clone()); } base_hashes = all_hashed_images.keys().cloned().collect::>(); } (base_hashes, hashes_with_multiple_images) } #[fun_time(message = "collect_hash_compare_result", level = "debug")] fn collect_hash_compare_result( &self, hashes_parents: HashMap, hashes_with_multiple_images: &HashSet, all_hashed_images: &HashMap>, collected_similar_images: &mut HashMap>, hashes_similarity: HashMap, ) { // Collecting results to vector for (parent_hash, child_number) in hashes_parents { // If hash contains other hasher OR multiple images are available for checked hash if child_number > 0 || hashes_with_multiple_images.contains(&parent_hash) { let vec_fe = all_hashed_images[&parent_hash].clone(); collected_similar_images.insert(parent_hash.clone(), vec_fe); } } for (child_hash, (parent_hash, similarity)) in hashes_similarity { let mut vec_fe = all_hashed_images[&child_hash].clone(); for fe in &mut vec_fe { fe.similarity = similarity; } collected_similar_images .get_mut(&parent_hash) .expect("Cannot find parent hash - this should be added in previous step") .append(&mut vec_fe); } } #[fun_time(message = "compare_hashes_with_non_zero_tolerance", level = "debug")] fn compare_hashes_with_non_zero_tolerance( &mut self, all_hashed_images: &HashMap>, collected_similar_images: &mut HashMap>, progress_sender: Option<&Sender>, stop_receiver: Option<&Receiver<()>>, tolerance: u32, ) -> bool { // Don't use hashes with multiple images in bktree, because they will always be master of group and cannot be find by other hashes let (base_hashes, hashes_with_multiple_images) = self.split_hashes(all_hashed_images); let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::SimilarImagesComparingHashes, base_hashes.len(), self.get_test_type()); let mut hashes_parents: HashMap = Default::default(); // Hashes used as parent (hash, children_number_of_hash) let mut hashes_similarity: HashMap = Default::default(); // Hashes used as child, (parent_hash, similarity) // Check them in chunks, to decrease number of used memory // println!(); let base_hashes_chunks = base_hashes.chunks(1000); for chunk in base_hashes_chunks { let partial_results = chunk .into_par_iter() .map(|hash_to_check| { atomic_counter.fetch_add(1, Ordering::Relaxed); if check_if_stop_received(stop_receiver) { check_was_stopped.store(true, Ordering::Relaxed); return None; } let mut found_items = self .bktree .find(hash_to_check, tolerance) .filter(|(similarity, compared_hash)| { *similarity != 0 && !hashes_parents.contains_key(*compared_hash) && !hashes_with_multiple_images.contains(*compared_hash) }) .filter(|(similarity, compared_hash)| { if let Some((_, other_similarity_with_parent)) = hashes_similarity.get(*compared_hash) { // If current hash is more similar to other hash than to current parent hash, then skip check earlier // Because there is no way to be more similar to other hash than to current parent hash if *similarity >= *other_similarity_with_parent { return false; } } true }) .collect::>(); // Sort by tolerance found_items.sort_unstable_by_key(|f| f.0); Some((hash_to_check, found_items)) }) .while_some() .filter(|(original_hash, vec_similar_hashes)| !vec_similar_hashes.is_empty() || hashes_with_multiple_images.contains(*original_hash)) .collect::>(); // for (hash, vec) in &partial_results { // println!("{hash:?} --- {:?}", vec.iter().map(|e| e.1).collect::>()); // } if check_was_stopped.load(Ordering::Relaxed) { send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); return false; } self.connect_results(partial_results, &mut hashes_parents, &mut hashes_similarity, &hashes_with_multiple_images); } send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); debug_check_for_duplicated_things(self.common_data.use_reference_folders, &hashes_parents, &hashes_similarity, all_hashed_images, "LATTER"); self.collect_hash_compare_result(hashes_parents, &hashes_with_multiple_images, all_hashed_images, collected_similar_images, hashes_similarity); true } #[fun_time(message = "connect_results", level = "debug")] fn connect_results( &self, partial_results: Vec<(&ImHash, Vec<(u32, &ImHash)>)>, hashes_parents: &mut HashMap, hashes_similarity: &mut HashMap, hashes_with_multiple_images: &HashSet, ) { for (original_hash, vec_compared_hashes) in partial_results { let mut number_of_added_child_items = 0; for (similarity, compared_hash) in vec_compared_hashes { // If hash is already in results skip it // This check duplicates check from bktree.find, but it is needed to because when iterating over elements, this structure can change if hashes_parents.contains_key(compared_hash) { continue; } // If there is already record, with smaller sensitivity, then replace it let mut need_to_add = false; let mut need_to_check = false; // TODO consider to replace variables from above with closures // If current checked hash, have parent, first we must check if similarity between them is lower than checked item if let Some((current_parent_hash, current_similarity_with_parent)) = hashes_similarity.get(original_hash) { if *current_similarity_with_parent > similarity { need_to_check = true; *hashes_parents.get_mut(current_parent_hash).expect("Cannot find parent hash") -= 1; if hashes_parents.get(current_parent_hash) == Some(&0) && !hashes_with_multiple_images.contains(current_parent_hash) { hashes_parents.remove(current_parent_hash); } hashes_similarity .remove(original_hash) .expect("This should never fail, because we are iterating over this hash"); } } else { need_to_check = true; } if need_to_check { if let Some((other_parent_hash, other_similarity)) = hashes_similarity.get(compared_hash) { if *other_similarity > similarity { need_to_add = true; *hashes_parents.get_mut(other_parent_hash).expect("Cannot find parent hash") -= 1; } } // But when there is no record, just add it else { need_to_add = true; } } if need_to_add { hashes_similarity.insert(compared_hash.clone(), (original_hash.clone(), similarity)); number_of_added_child_items += 1; } } if number_of_added_child_items > 0 || hashes_with_multiple_images.contains(original_hash) { hashes_parents.insert((*original_hash).clone(), number_of_added_child_items); } } } #[fun_time(message = "find_similar_hashes", level = "debug")] fn find_similar_hashes(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { if self.image_hashes.is_empty() { return true; } let tolerance = self.get_params().similarity; // Results let mut collected_similar_images: HashMap> = Default::default(); let all_hashed_images = mem::take(&mut self.image_hashes); // Checking entries with tolerance 0 is really easy and fast, because only entries with same hashes needs to be checked if tolerance == 0 { for (hash, vec_file_entry) in all_hashed_images { if vec_file_entry.len() >= 2 { collected_similar_images.insert(hash, vec_file_entry); } } } else if !self.compare_hashes_with_non_zero_tolerance(&all_hashed_images, &mut collected_similar_images, progress_sender, stop_receiver, tolerance) { return false; } self.verify_duplicated_items(&collected_similar_images); // Info about hashes is not needed anymore, so we drop this info self.similar_vectors = collected_similar_images.into_values().collect(); self.exclude_items_with_same_size(); self.remove_multiple_records_from_reference_folders(); if self.common_data.use_reference_folders { for (_fe, vector) in &self.similar_referenced_vectors { self.information.number_of_duplicates += vector.len(); self.information.number_of_groups += 1; } } else { for vector in &self.similar_vectors { self.information.number_of_duplicates += vector.len() - 1; self.information.number_of_groups += 1; } } // Clean unused data to save ram self.image_hashes = Default::default(); self.images_to_check = Default::default(); self.bktree = BKTree::new(Hamming); true } #[fun_time(message = "exclude_items_with_same_size", level = "debug")] fn exclude_items_with_same_size(&mut self) { if self.get_params().exclude_images_with_same_size { for vec_file_entry in mem::take(&mut self.similar_vectors) { let mut bt_sizes: BTreeSet = Default::default(); let mut vec_values = Vec::new(); for file_entry in vec_file_entry { if !bt_sizes.contains(&file_entry.size) { bt_sizes.insert(file_entry.size); vec_values.push(file_entry); } } if vec_values.len() > 1 { self.similar_vectors.push(vec_values); } } } } #[fun_time(message = "remove_multiple_records_from_reference_folders", level = "debug")] fn remove_multiple_records_from_reference_folders(&mut self) { if self.common_data.use_reference_folders { self.similar_referenced_vectors = mem::take(&mut self.similar_vectors) .into_iter() .filter_map(|vec_file_entry| { let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) = vec_file_entry .into_iter() .partition(|e| self.common_data.directories.is_in_referenced_directory(e.get_path())); if normal_files.is_empty() { None } else { files_from_referenced_folders.pop().map(|file| (file, normal_files)) } }) .collect::)>>(); } } #[allow(unused_variables)] // TODO this probably not works good when reference folders are used pub fn verify_duplicated_items(&self, collected_similar_images: &HashMap>) { if !cfg!(debug_assertions) { return; } // Validating if group contains duplicated results let mut result_hashset: HashSet = Default::default(); let mut found = false; for vec_file_entry in collected_similar_images.values() { if vec_file_entry.is_empty() { println!("Empty group"); found = true; continue; } if vec_file_entry.len() == 1 { println!("Single Element {vec_file_entry:?}"); found = true; continue; } for file_entry in vec_file_entry { let st = file_entry.path.to_string_lossy().to_string(); if result_hashset.contains(&st) { found = true; println!("Duplicated Element {st}"); } else { result_hashset.insert(st); } } } assert!(!found, "Found Invalid entries, verify errors before"); } fn delete_files(&mut self) { if self.common_data.delete_method == DeleteMethod::None { return; } let vec_files = self.similar_vectors.iter().collect::>(); delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); } } fn is_in_reference_folder(reference_directories: &[PathBuf], path: &Path) -> bool { reference_directories.iter().any(|e| path.starts_with(e)) } impl DebugPrint for SimilarImages { fn debug_print(&self) { if !cfg!(debug_assertions) { return; } println!("---------------DEBUG PRINT---------------"); self.debug_print_common(); println!("-----------------------------------------"); } } impl PrintResults for SimilarImages { fn write_results(&self, writer: &mut T) -> std::io::Result<()> { if !self.similar_vectors.is_empty() { write!(writer, "{} images which have similar friends\n\n", self.similar_vectors.len())?; for struct_similar in &self.similar_vectors { writeln!(writer, "Found {} images which have similar friends", struct_similar.len())?; for file_entry in struct_similar { writeln!( writer, "\"{}\" - {}x{} - {} - {}", file_entry.path.to_string_lossy(), file_entry.width, file_entry.height, format_size(file_entry.size, BINARY), get_string_from_similarity(&file_entry.similarity, self.get_params().hash_size) )?; } writeln!(writer)?; } } else if !self.similar_referenced_vectors.is_empty() { writeln!(writer, "{} images which have similar friends\n\n", self.similar_referenced_vectors.len())?; for (file_entry, vec_file_entry) in &self.similar_referenced_vectors { writeln!(writer, "Found {} images which have similar friends", vec_file_entry.len())?; writeln!(writer)?; writeln!( writer, "\"{}\" - {}x{} - {} - {}", file_entry.path.to_string_lossy(), file_entry.width, file_entry.height, format_size(file_entry.size, BINARY), get_string_from_similarity(&file_entry.similarity, self.get_params().hash_size) )?; for file_entry in vec_file_entry { writeln!( writer, "{:?} - {}x{} - {} - {}", file_entry.path, file_entry.width, file_entry.height, format_size(file_entry.size, BINARY), get_string_from_similarity(&file_entry.similarity, self.get_params().hash_size) )?; } writeln!(writer)?; } } else { write!(writer, "Not found any similar images.")?; } Ok(()) } fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()> { if self.get_use_reference() { self.save_results_to_file_as_json_internal(file_name, &self.similar_referenced_vectors, pretty_print) } else { self.save_results_to_file_as_json_internal(file_name, &self.similar_vectors, pretty_print) } } } pub fn get_string_from_similarity(similarity: &u32, hash_size: u8) -> String { let index_preset = match hash_size { 8 => 0, 16 => 1, 32 => 2, 64 => 3, _ => panic!("Invalid hash size {hash_size}"), }; if *similarity == 0 { flc!("core_similarity_original") } else if *similarity <= SIMILAR_VALUES[index_preset][0] { flc!("core_similarity_very_high") } else if *similarity <= SIMILAR_VALUES[index_preset][1] { flc!("core_similarity_high") } else if *similarity <= SIMILAR_VALUES[index_preset][2] { flc!("core_similarity_medium") } else if *similarity <= SIMILAR_VALUES[index_preset][3] { flc!("core_similarity_small") } else if *similarity <= SIMILAR_VALUES[index_preset][4] { flc!("core_similarity_very_small") } else if *similarity <= SIMILAR_VALUES[index_preset][5] { flc!("core_similarity_minimal") } else { panic!("Invalid similarity value {similarity} for hash size {hash_size} (index {index_preset})"); } } pub fn return_similarity_from_similarity_preset(similarity_preset: &SimilarityPreset, hash_size: u8) -> u32 { let index_preset = match hash_size { 8 => 0, 16 => 1, 32 => 2, 64 => 3, _ => panic!(), }; match similarity_preset { SimilarityPreset::Original => 0, SimilarityPreset::VeryHigh => SIMILAR_VALUES[index_preset][0], SimilarityPreset::High => SIMILAR_VALUES[index_preset][1], SimilarityPreset::Medium => SIMILAR_VALUES[index_preset][2], SimilarityPreset::Small => SIMILAR_VALUES[index_preset][3], SimilarityPreset::VerySmall => SIMILAR_VALUES[index_preset][4], SimilarityPreset::Minimal => SIMILAR_VALUES[index_preset][5], SimilarityPreset::None => panic!(""), } } pub fn convert_filters_to_string(image_filter: &FilterType) -> String { match image_filter { FilterType::Lanczos3 => "Lanczos3", FilterType::Nearest => "Nearest", FilterType::Triangle => "Triangle", FilterType::Gaussian => "Gaussian", FilterType::CatmullRom => "CatmullRom", } .to_string() } pub fn convert_algorithm_to_string(hash_alg: &HashAlg) -> String { match hash_alg { HashAlg::Mean => "Mean", HashAlg::Gradient => "Gradient", HashAlg::Blockhash => "Blockhash", HashAlg::VertGradient => "VertGradient", HashAlg::DoubleGradient => "DoubleGradient", HashAlg::Median => "Median", } .to_string() } pub fn test_image_conversion_speed() { let file_name: &str = "test.jpg"; let file_path = Path::new(file_name); match image::open(file_path) { Ok(img_open) => { for alg in [ HashAlg::Blockhash, HashAlg::Gradient, HashAlg::DoubleGradient, HashAlg::VertGradient, HashAlg::Mean, HashAlg::Median, ] { for filter in [ FilterType::Lanczos3, FilterType::CatmullRom, FilterType::Gaussian, FilterType::Nearest, FilterType::Triangle, ] { for size in [8, 16, 32, 64] { let hasher_config = HasherConfig::new().hash_alg(alg).resize_filter(filter).hash_size(size, size); let start = SystemTime::now(); let hasher = hasher_config.to_hasher(); let _hash = hasher.hash_image(&img_open); let end = SystemTime::now(); println!( "{:?} us {:?} {:?} {}x{}", end.duration_since(start).expect("Used time backwards").as_micros(), alg, filter, size, size ); } } } } Err(e) => { println!( "Failed to open test file {}, reason {}", match file_path.canonicalize() { Ok(t) => t.to_string_lossy().to_string(), Err(_inspected) => file_name.to_string(), }, e ); } } } #[allow(dead_code)] #[allow(unreachable_code)] #[allow(unused_variables)] // Function to validate if after first check there are any duplicated entries // E.g. /a.jpg is used also as master and similar image which is forbidden, because may // cause accidentally delete more pictures that user wanted fn debug_check_for_duplicated_things( use_reference_folders: bool, hashes_parents: &HashMap, hashes_similarity: &HashMap, all_hashed_images: &HashMap>, numm: &str, ) { if !cfg!(debug_assertions) { return; } if use_reference_folders { return; } let mut found_broken_thing = false; let mut hashmap_hashes: HashSet<_> = Default::default(); let mut hashmap_names: HashSet<_> = Default::default(); for (hash, number_of_children) in hashes_parents { if *number_of_children > 0 { if hashmap_hashes.contains(hash) { println!("------1--HASH--{} {:?}", numm, all_hashed_images[hash]); found_broken_thing = true; } hashmap_hashes.insert((*hash).clone()); for i in &all_hashed_images[hash] { let name = i.path.to_string_lossy().to_string(); if hashmap_names.contains(&name) { println!("------1--NAME--{numm} {name:?}"); found_broken_thing = true; } hashmap_names.insert(name); } } } for hash in hashes_similarity.keys() { if hashmap_hashes.contains(hash) { println!("------2--HASH--{} {:?}", numm, all_hashed_images[hash]); found_broken_thing = true; } hashmap_hashes.insert((*hash).clone()); for i in &all_hashed_images[hash] { let name = i.path.to_string_lossy().to_string(); if hashmap_names.contains(&name) { println!("------2--NAME--{numm} {name:?}"); found_broken_thing = true; } hashmap_names.insert(name); } } assert!(!found_broken_thing); } impl CommonData for SimilarImages { fn get_cd(&self) -> &CommonToolData { &self.common_data } fn get_cd_mut(&mut self) -> &mut CommonToolData { &mut self.common_data } } impl SimilarImages { pub fn get_params(&self) -> &SimilarImagesParameters { &self.params } pub const fn get_similar_images(&self) -> &Vec> { &self.similar_vectors } pub fn get_similar_images_referenced(&self) -> &Vec<(ImagesEntry, Vec)> { &self.similar_referenced_vectors } pub fn get_use_reference(&self) -> bool { self.common_data.use_reference_folders } pub const fn get_information(&self) -> &Info { &self.information } } #[cfg(test)] mod tests { use std::collections::HashMap; use std::path::PathBuf; use bk_tree::BKTree; use image::imageops::FilterType; use image_hasher::HashAlg; use crate::common_tool::CommonData; use crate::similar_images::{Hamming, ImHash, ImagesEntry, SimilarImages, SimilarImagesParameters}; fn get_default_parameters() -> SimilarImagesParameters { SimilarImagesParameters { hash_alg: HashAlg::Gradient, hash_size: 8, similarity: 0, image_filter: FilterType::Lanczos3, exclude_images_with_same_size: false, ignore_hard_links: false, } } #[test] fn test_compare_no_images() { for _ in 0..100 { let mut similar_images = SimilarImages::new(get_default_parameters()); similar_images.find_similar_images(None, None); assert_eq!(similar_images.get_similar_images().len(), 0); } } #[test] fn test_compare_tolerance_0_normal_mode() { for _ in 0..100 { let mut parameters = get_default_parameters(); parameters.similarity = 0; let mut similar_images = SimilarImages::new(parameters); let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "abc.txt"); let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "bcd.txt"); let fe3 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 2], "cde.txt"); let fe4 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 2], "rrt.txt"); let fe5 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 2], "bld.txt"); add_hashes(&mut similar_images.image_hashes, vec![fe1.clone(), fe2.clone(), fe3.clone(), fe4.clone(), fe5.clone()]); similar_images.find_similar_hashes(None, None); assert_eq!(similar_images.get_similar_images().len(), 2); let first_group = similar_images.get_similar_images()[0].iter().map(|e| &e.path).collect::>(); let second_group = similar_images.get_similar_images()[1].iter().map(|e| &e.path).collect::>(); // Initial order is not guaranteed, so we need to check both options if similar_images.get_similar_images()[0][0].hash == fe1.hash { assert_eq!(first_group, vec![&fe1.path, &fe2.path]); assert_eq!(second_group, vec![&fe3.path, &fe4.path, &fe5.path]); } else { assert_eq!(first_group, vec![&fe3.path, &fe4.path, &fe5.path]); assert_eq!(second_group, vec![&fe1.path, &fe2.path]); } } } #[test] fn test_simple_normal_one_group() { for _ in 0..100 { let mut parameters = get_default_parameters(); parameters.similarity = 1; let mut similar_images = SimilarImages::new(parameters); let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "abc.txt"); let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "bcd.txt"); add_hashes(&mut similar_images.image_hashes, vec![fe1, fe2]); similar_images.find_similar_hashes(None, None); assert_eq!(similar_images.get_similar_images().len(), 1); } } #[test] fn test_simple_normal_one_group_extended() { for _ in 0..100 { let mut parameters = get_default_parameters(); parameters.similarity = 2; let mut similar_images = SimilarImages::new(parameters); similar_images.set_use_reference_folders(false); let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "abc.txt"); let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "bcd.txt"); let fe3 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 2], "rrd.txt"); add_hashes(&mut similar_images.image_hashes, vec![fe1, fe2, fe3]); similar_images.find_similar_hashes(None, None); assert_eq!(similar_images.get_similar_images().len(), 1); assert_eq!(similar_images.get_similar_images()[0].len(), 3); } } #[test] fn test_simple_referenced_same_group() { for _ in 0..100 { let mut parameters = get_default_parameters(); parameters.similarity = 0; let mut similar_images = SimilarImages::new(parameters); similar_images.set_use_reference_folders(true); // Not using special method, because it validates if path exists similar_images.common_data.directories.reference_directories = vec![PathBuf::from("/home/rr/")]; let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/rr/abc.txt"); let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/rr/bcd.txt"); add_hashes(&mut similar_images.image_hashes, vec![fe1, fe2]); similar_images.find_similar_hashes(None, None); assert_eq!(similar_images.get_similar_images().len(), 0); } } #[test] fn test_simple_referenced_group_extended() { for _ in 0..100 { let mut parameters = get_default_parameters(); parameters.similarity = 0; let mut similar_images = SimilarImages::new(parameters); similar_images.set_use_reference_folders(true); // Not using special method, because it validates if path exists similar_images.common_data.directories.reference_directories = vec![PathBuf::from("/home/rr/")]; let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/rr/abc.txt"); let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/kk/bcd.txt"); add_hashes(&mut similar_images.image_hashes, vec![fe1, fe2]); similar_images.find_similar_hashes(None, None); assert_eq!(similar_images.get_similar_images_referenced().len(), 1); assert_eq!(similar_images.get_similar_images_referenced()[0].1.len(), 1); } } #[test] fn test_simple_referenced_group_extended2() { for _ in 0..100 { let mut parameters = get_default_parameters(); parameters.similarity = 0; let mut similar_images = SimilarImages::new(parameters); similar_images.set_use_reference_folders(true); // Not using special method, because it validates if path exists similar_images.common_data.directories.reference_directories = vec![PathBuf::from("/home/rr/")]; let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/rr/abc.txt"); let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/rr/abc2.txt"); let fe3 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/kk/bcd.txt"); let fe4 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/kk/bcd2.txt"); add_hashes(&mut similar_images.image_hashes, vec![fe1, fe2, fe3, fe4]); similar_images.find_similar_hashes(None, None); let res = similar_images.get_similar_images_referenced(); assert_eq!(res.len(), 1); assert_eq!(res[0].1.len(), 2); assert!(res[0].1.iter().all(|e| e.path.starts_with("/home/kk/"))); } } #[test] fn test_simple_normal_too_small_similarity() { for _ in 0..100 { let mut parameters = get_default_parameters(); parameters.similarity = 1; let mut similar_images = SimilarImages::new(parameters); similar_images.set_use_reference_folders(false); let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b00001], "abc.txt"); let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b00100], "bcd.txt"); let fe3 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b10000], "rrd.txt"); add_hashes(&mut similar_images.image_hashes, vec![fe1, fe2, fe3]); similar_images.find_similar_hashes(None, None); let res = similar_images.get_similar_images(); assert!(res.is_empty()); } } #[test] fn test_simple_normal_union_of_similarity() { for _ in 0..100 { let mut parameters = get_default_parameters(); parameters.similarity = 4; let mut similar_images = SimilarImages::new(parameters); similar_images.set_use_reference_folders(false); let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0000_0001], "abc.txt"); let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0000_1111], "bcd.txt"); let fe3 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0111_1111], "rrd.txt"); add_hashes(&mut similar_images.image_hashes, vec![fe1, fe2, fe3]); similar_images.find_similar_hashes(None, None); let res = similar_images.get_similar_images(); assert_eq!(res.len(), 1); let mut path = res[0].iter().map(|e| e.path.to_string_lossy().to_string()).collect::>(); path.sort(); if res[0].len() == 3 { assert_eq!(path, vec!["abc.txt".to_string(), "bcd.txt".to_string(), "rrd.txt".to_string()]); } else if res[0].len() == 2 { assert!(path == vec!["abc.txt".to_string(), "bcd.txt".to_string()] || path == vec!["bcd.txt".to_string(), "rrd.txt".to_string()]); } else { panic!("Invalid number of items"); } } } #[test] fn test_reference_similarity_only_one() { for _ in 0..100 { let mut parameters = get_default_parameters(); parameters.similarity = 1; let mut similar_images = SimilarImages::new(parameters); similar_images.set_use_reference_folders(true); // Not using special method, because it validates if path exists similar_images.common_data.directories.reference_directories = vec![PathBuf::from("/home/rr/")]; let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0001], "/home/rr/abc.txt"); let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0011], "/home/kk/bcd.txt"); add_hashes(&mut similar_images.image_hashes, vec![fe1, fe2]); similar_images.find_similar_hashes(None, None); let res = similar_images.get_similar_images_referenced(); assert_eq!(res.len(), 1); assert_eq!(res[0].1.len(), 1); assert_eq!(res[0].0.path, PathBuf::from("/home/rr/abc.txt")); assert_eq!(res[0].1[0].path, PathBuf::from("/home/kk/bcd.txt")); } } #[test] fn test_reference_too_small_similarity() { for _ in 0..100 { let mut parameters = get_default_parameters(); parameters.similarity = 1; let mut similar_images = SimilarImages::new(parameters); similar_images.set_use_reference_folders(true); // Not using special method, because it validates if path exists similar_images.common_data.directories.reference_directories = vec![PathBuf::from("/home/rr/")]; let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0001], "/home/rr/abc.txt"); let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0010], "/home/kk/bcd.txt"); add_hashes(&mut similar_images.image_hashes, vec![fe1, fe2]); similar_images.find_similar_hashes(None, None); let res = similar_images.get_similar_images_referenced(); assert_eq!(res.len(), 0); } } #[test] fn test_reference_minimal() { for _ in 0..100 { let mut parameters = get_default_parameters(); parameters.similarity = 1; let mut similar_images = SimilarImages::new(parameters); similar_images.set_use_reference_folders(true); // Not using special method, because it validates if path exists similar_images.common_data.directories.reference_directories = vec![PathBuf::from("/home/rr/")]; let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0001], "/home/rr/abc.txt"); let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0011], "/home/kk/bcd.txt"); let fe3 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0100], "/home/kk/bcd2.txt"); let fe4 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b1100], "/home/rr/krkr.txt"); add_hashes(&mut similar_images.image_hashes, vec![fe1, fe2, fe3, fe4]); similar_images.find_similar_hashes(None, None); let res = similar_images.get_similar_images_referenced(); assert_eq!(res.len(), 2); assert_eq!(res[0].1.len(), 1); assert_eq!(res[1].1.len(), 1); if res[0].1[0].path == PathBuf::from("/home/kk/bcd.txt") { assert_eq!(res[0].0.path, PathBuf::from("/home/rr/abc.txt")); assert_eq!(res[1].0.path, PathBuf::from("/home/rr/krkr.txt")); } else if res[0].1[0].path == PathBuf::from("/home/kk/bcd2.txt") { assert_eq!(res[0].0.path, PathBuf::from("/home/rr/krkr.txt")); assert_eq!(res[1].0.path, PathBuf::from("/home/rr/abc.txt")); } } } #[test] fn test_reference_same() { for _ in 0..100 { let mut parameters = get_default_parameters(); parameters.similarity = 1; let mut similar_images = SimilarImages::new(parameters); similar_images.set_use_reference_folders(true); // Not using special method, because it validates if path exists similar_images.common_data.directories.reference_directories = vec![PathBuf::from("/home/rr/")]; let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/rr/abc.txt"); let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/kk/bcd.txt"); add_hashes(&mut similar_images.image_hashes, vec![fe1, fe2]); similar_images.find_similar_hashes(None, None); let res = similar_images.get_similar_images_referenced(); assert_eq!(res.len(), 1); assert_eq!(res[0].1.len(), 1); } } #[test] fn test_reference_union() { for _ in 0..100 { let mut parameters = get_default_parameters(); parameters.similarity = 10; let mut similar_images = SimilarImages::new(parameters); similar_images.set_use_reference_folders(true); // Not using special method, because it validates if path exists similar_images.common_data.directories.reference_directories = vec![PathBuf::from("/home/rr/")]; let fe0 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b1000], "/home/rr/abc2.txt"); let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0001], "/home/rr/abc.txt"); let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b1110], "/home/kk/bcd.txt"); let fe3 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0100], "/home/kk/bcd2.txt"); let fe4 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b1100], "/home/rr/krkr.txt"); add_hashes(&mut similar_images.image_hashes, vec![fe0, fe1, fe2, fe3, fe4]); similar_images.find_similar_hashes(None, None); let res = similar_images.get_similar_images_referenced(); assert_eq!(res.len(), 1); assert_eq!(res[0].1.len(), 2); assert_eq!(res[0].0.path, PathBuf::from("/home/rr/krkr.txt")); } } #[test] fn test_tolerance() { // This test not really tests anything, but shows that current hamming distance works // in bits instead of bytes // I tried to make it work in bytes, but it was terrible, so Hamming should be really Ok let fe1 = vec![1, 1, 1, 1, 1, 1, 1, 1]; let fe2 = vec![1, 1, 1, 1, 1, 1, 1, 2]; let mut bktree = BKTree::new(Hamming); bktree.add(fe1); let (similarity, _hash) = bktree.find(&fe2, 100).next().expect("No similar images found"); assert_eq!(similarity, 2); let fe1 = vec![1, 1, 1, 1, 1, 1, 1, 1]; let fe2 = vec![1, 1, 1, 1, 1, 1, 1, 3]; let mut bktree = BKTree::new(Hamming); bktree.add(fe1); let (similarity, _hash) = bktree.find(&fe2, 100).next().expect("No similar images found"); assert_eq!(similarity, 1); let fe1 = vec![1, 1, 1, 1, 1, 1, 1, 0b0000_0000]; let fe2 = vec![1, 1, 1, 1, 1, 1, 1, 0b0000_1000]; let mut bktree = BKTree::new(Hamming); bktree.add(fe1); let (similarity, _hash) = bktree.find(&fe2, 100).next().expect("No similar images found"); assert_eq!(similarity, 1); } fn add_hashes(hashmap: &mut HashMap>, file_entries: Vec) { for fe in file_entries { hashmap.entry(fe.hash.clone()).or_default().push(fe); } } fn create_random_file_entry(hash: Vec, name: &str) -> ImagesEntry { ImagesEntry { path: PathBuf::from(name.to_string()), size: 0, width: 100, height: 100, modified_date: 0, hash, similarity: 0, } } } czkawka_core-8.0.0/src/similar_videos.rs000064400000000000000000000424601046102023000164500ustar 00000000000000use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::io::Write; use std::mem; use std::path::{Path, PathBuf}; use std::sync::atomic::Ordering; use crossbeam_channel::{Receiver, Sender}; use ffmpeg_cmdline_utils::FfmpegErrorKind::FfmpegNotFound; use fun_time::fun_time; use humansize::{format_size, BINARY}; use log::debug; use rayon::prelude::*; use serde::{Deserialize, Serialize}; use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo; use vid_dup_finder_lib::{NormalizedTolerance, VideoHash}; use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS}; use crate::common_cache::{extract_loaded_cache, get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized}; use crate::common_dir_traversal::{inode, take_1_per_inode, DirTraversalBuilder, DirTraversalResult, FileEntry, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::{DebugPrint, PrintResults, ResultEntry}; use crate::flc; use crate::progress_data::{CurrentStage, ProgressData}; pub const MAX_TOLERANCE: i32 = 20; #[derive(Clone, Debug, Serialize, Deserialize)] pub struct VideosEntry { pub path: PathBuf, pub size: u64, pub modified_date: u64, pub vhash: VideoHash, pub error: String, } impl ResultEntry for VideosEntry { fn get_path(&self) -> &Path { &self.path } fn get_modified_date(&self) -> u64 { self.modified_date } fn get_size(&self) -> u64 { self.size } } impl FileEntry { fn into_videos_entry(self) -> VideosEntry { VideosEntry { size: self.size, path: self.path, modified_date: self.modified_date, vhash: Default::default(), error: String::new(), } } } pub struct SimilarVideosParameters { pub tolerance: i32, pub exclude_videos_with_same_size: bool, pub ignore_hard_links: bool, } impl SimilarVideosParameters { pub fn new(tolerance: i32, exclude_videos_with_same_size: bool, ignore_hard_links: bool) -> Self { assert!((0..=MAX_TOLERANCE).contains(&tolerance)); Self { tolerance, exclude_videos_with_same_size, ignore_hard_links, } } } pub struct SimilarVideos { common_data: CommonToolData, information: Info, similar_vectors: Vec>, similar_referenced_vectors: Vec<(VideosEntry, Vec)>, videos_hashes: BTreeMap, Vec>, videos_to_check: BTreeMap, params: SimilarVideosParameters, } impl CommonData for SimilarVideos { fn get_cd(&self) -> &CommonToolData { &self.common_data } fn get_cd_mut(&mut self) -> &mut CommonToolData { &mut self.common_data } } #[derive(Default)] pub struct Info { pub number_of_duplicates: usize, pub number_of_groups: u64, } impl SimilarVideos { pub fn new(params: SimilarVideosParameters) -> Self { Self { common_data: CommonToolData::new(ToolType::SimilarVideos), information: Default::default(), similar_vectors: vec![], videos_hashes: Default::default(), videos_to_check: Default::default(), similar_referenced_vectors: vec![], params, } } #[fun_time(message = "find_similar_videos", level = "info")] pub fn find_similar_videos(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) { if !check_if_ffmpeg_is_installed() { self.common_data.text_messages.errors.push(flc!("core_ffmpeg_not_found")); #[cfg(target_os = "windows")] self.common_data.text_messages.errors.push(flc!("core_ffmpeg_not_found_windows")); #[cfg(target_os = "linux")] self.common_data .text_messages .errors .push(flc!("core_ffmpeg_missing_in_snap", url = "https://github.com/snapcrafters/ffmpeg/issues/73")); } else { self.prepare_items(); self.common_data.use_reference_folders = !self.common_data.directories.reference_directories.is_empty(); if !self.check_for_similar_videos(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } if !self.sort_videos(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } } self.delete_files(); self.debug_print(); } #[fun_time(message = "check_for_similar_videos", level = "debug")] fn check_for_similar_videos(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { self.common_data.extensions.set_and_validate_allowed_extensions(VIDEO_FILES_EXTENSIONS); if !self.common_data.extensions.set_any_extensions() { return true; } let result = DirTraversalBuilder::new() .group_by(inode) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .common_data(&self.common_data) .build() .run(); match result { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { self.videos_to_check = grouped_file_entries .into_iter() .flat_map(if self.get_params().ignore_hard_links { |(_, fes)| fes } else { take_1_per_inode }) .map(|fe| (fe.path.to_string_lossy().to_string(), fe.into_videos_entry())) .collect(); self.common_data.text_messages.warnings.extend(warnings); debug!("check_files - Found {} video files.", self.videos_to_check.len()); true } DirTraversalResult::Stopped => false, } } #[fun_time(message = "load_cache_at_start", level = "debug")] fn load_cache_at_start(&mut self) -> (BTreeMap, BTreeMap, BTreeMap) { let loaded_hash_map; let mut records_already_cached: BTreeMap = Default::default(); let mut non_cached_files_to_check: BTreeMap = Default::default(); if self.common_data.use_cache { let (messages, loaded_items) = load_cache_from_file_generalized_by_path::(&get_similar_videos_cache_file(), self.get_delete_outdated_cache(), &self.videos_to_check); self.get_text_messages_mut().extend_with_another_messages(messages); loaded_hash_map = loaded_items.unwrap_or_default(); extract_loaded_cache( &loaded_hash_map, mem::take(&mut self.videos_to_check), &mut records_already_cached, &mut non_cached_files_to_check, ); } else { loaded_hash_map = Default::default(); mem::swap(&mut self.videos_to_check, &mut non_cached_files_to_check); } (loaded_hash_map, records_already_cached, non_cached_files_to_check) } #[fun_time(message = "sort_videos", level = "debug")] fn sort_videos(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { if self.videos_to_check.is_empty() { return true; } let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.load_cache_at_start(); let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) = prepare_thread_handler_common( progress_sender, CurrentStage::SimilarVideosCalculatingHashes, non_cached_files_to_check.len(), self.get_test_type(), ); let mut vec_file_entry: Vec = non_cached_files_to_check .par_iter() .map(|file_entry| { atomic_counter.fetch_add(1, Ordering::Relaxed); if check_if_stop_received(stop_receiver) { check_was_stopped.store(true, Ordering::Relaxed); return None; } let mut file_entry = file_entry.1.clone(); let vhash = match VideoHash::from_path(&file_entry.path) { Ok(t) => t, Err(e) => { return { file_entry.error = format!("Failed to hash file, reason {e}"); Some(file_entry) }; } }; file_entry.vhash = vhash; Some(file_entry) }) .while_some() .collect::>(); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); // Just connect loaded results with already calculated hashes vec_file_entry.extend(records_already_cached.into_values()); let mut hashmap_with_file_entries: HashMap = Default::default(); let mut vector_of_hashes: Vec = Vec::new(); for file_entry in &vec_file_entry { // 0 means that images was not hashed correctly, e.g. could be improperly if file_entry.error.is_empty() { hashmap_with_file_entries.insert(file_entry.vhash.src_path().to_string_lossy().to_string(), file_entry.clone()); vector_of_hashes.push(file_entry.vhash.clone()); } else { self.common_data.text_messages.warnings.push(file_entry.error.clone()); } } self.save_cache(vec_file_entry, loaded_hash_map); // Break if stop was clicked after saving to cache if check_was_stopped.load(Ordering::Relaxed) { return false; } self.match_groups_of_videos(vector_of_hashes, &hashmap_with_file_entries); self.remove_from_reference_folders(); if self.common_data.use_reference_folders { for (_fe, vector) in &self.similar_referenced_vectors { self.information.number_of_duplicates += vector.len(); self.information.number_of_groups += 1; } } else { for vector in &self.similar_vectors { self.information.number_of_duplicates += vector.len() - 1; self.information.number_of_groups += 1; } } // Clean unused data self.videos_hashes = Default::default(); self.videos_to_check = Default::default(); true } #[fun_time(message = "save_cache", level = "debug")] fn save_cache(&mut self, vec_file_entry: Vec, loaded_hash_map: BTreeMap) { if self.common_data.use_cache { // Must save all results to file, old loaded from file with all currently counted results let mut all_results: BTreeMap = loaded_hash_map; for file_entry in vec_file_entry { all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry); } let messages = save_cache_to_file_generalized(&get_similar_videos_cache_file(), &all_results, self.common_data.save_also_as_json, 0); self.get_text_messages_mut().extend_with_another_messages(messages); } } #[fun_time(message = "match_groups_of_videos", level = "debug")] fn match_groups_of_videos(&mut self, vector_of_hashes: Vec, hashmap_with_file_entries: &HashMap) { let match_group = vid_dup_finder_lib::search(vector_of_hashes, NormalizedTolerance::new(self.get_params().tolerance as f64 / 100.0f64)); let mut collected_similar_videos: Vec> = Default::default(); for i in match_group { let mut temp_vector: Vec = Vec::new(); let mut bt_size: BTreeSet = Default::default(); for j in i.duplicates() { let file_entry = &hashmap_with_file_entries[&j.to_string_lossy().to_string()]; if self.get_params().exclude_videos_with_same_size { if !bt_size.contains(&file_entry.size) { bt_size.insert(file_entry.size); temp_vector.push(file_entry.clone()); } } else { temp_vector.push(file_entry.clone()); } } if temp_vector.len() > 1 { collected_similar_videos.push(temp_vector); } } self.similar_vectors = collected_similar_videos; } #[fun_time(message = "remove_from_reference_folders", level = "debug")] fn remove_from_reference_folders(&mut self) { if self.common_data.use_reference_folders { self.similar_referenced_vectors = mem::take(&mut self.similar_vectors) .into_iter() .filter_map(|vec_file_entry| { let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) = vec_file_entry .into_iter() .partition(|e| self.common_data.directories.is_in_referenced_directory(e.get_path())); if normal_files.is_empty() { None } else { files_from_referenced_folders.pop().map(|file| (file, normal_files)) } }) .collect::)>>(); } } fn delete_files(&mut self) { if self.common_data.delete_method == DeleteMethod::None { return; } let vec_files = self.similar_vectors.iter().collect::>(); delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); } } impl DebugPrint for SimilarVideos { #[fun_time(message = "debug_print", level = "debug")] fn debug_print(&self) { if !cfg!(debug_assertions) { return; } println!("---------------DEBUG PRINT---------------"); println!("Included directories - {:?}", self.common_data.directories.included_directories); self.debug_print_common(); println!("-----------------------------------------"); } } impl PrintResults for SimilarVideos { fn write_results(&self, writer: &mut T) -> std::io::Result<()> { if !self.similar_vectors.is_empty() { write!(writer, "{} videos which have similar friends\n\n", self.similar_vectors.len())?; for struct_similar in &self.similar_vectors { writeln!(writer, "Found {} videos which have similar friends", struct_similar.len())?; for file_entry in struct_similar { writeln!(writer, "\"{}\" - {}", file_entry.path.to_string_lossy(), format_size(file_entry.size, BINARY))?; } writeln!(writer)?; } } else if !self.similar_referenced_vectors.is_empty() { write!(writer, "{} videos which have similar friends\n\n", self.similar_referenced_vectors.len())?; for (fe, struct_similar) in &self.similar_referenced_vectors { writeln!(writer, "Found {} videos which have similar friends", struct_similar.len())?; writeln!(writer)?; writeln!(writer, "\"{}\" - {}", fe.path.to_string_lossy(), format_size(fe.size, BINARY))?; for file_entry in struct_similar { writeln!(writer, "\"{}\" - {}", file_entry.path.to_string_lossy(), format_size(file_entry.size, BINARY))?; } writeln!(writer)?; } } else { write!(writer, "Not found any similar videos.")?; } Ok(()) } fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()> { if self.get_use_reference() { self.save_results_to_file_as_json_internal(file_name, &self.similar_referenced_vectors, pretty_print) } else { self.save_results_to_file_as_json_internal(file_name, &self.similar_vectors, pretty_print) } } } pub fn check_if_ffmpeg_is_installed() -> bool { let vid = "9999czekoczekoczekolada999.txt"; if let Err(DetermineVideo { src_path: _a, error: FfmpegNotFound, }) = VideoHash::from_path(vid) { return false; } true } impl SimilarVideos { pub fn get_params(&self) -> &SimilarVideosParameters { &self.params } pub const fn get_similar_videos(&self) -> &Vec> { &self.similar_vectors } pub const fn get_information(&self) -> &Info { &self.information } pub fn get_similar_videos_referenced(&self) -> &Vec<(VideosEntry, Vec)> { &self.similar_referenced_vectors } pub fn get_number_of_base_duplicated_files(&self) -> usize { if self.common_data.use_reference_folders { self.similar_referenced_vectors.len() } else { self.similar_vectors.len() } } pub fn get_use_reference(&self) -> bool { self.common_data.use_reference_folders } } czkawka_core-8.0.0/src/temporary.rs000064400000000000000000000206621046102023000154610ustar 00000000000000use std::fs; use std::fs::DirEntry; use std::io::prelude::*; use std::path::PathBuf; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use crossbeam_channel::{Receiver, Sender}; use fun_time::fun_time; use rayon::prelude::*; use serde::Serialize; use crate::common::{check_folder_children, check_if_stop_received, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads}; use crate::common_dir_traversal::{common_read_dir, get_modified_time, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::*; use crate::progress_data::{CurrentStage, ProgressData}; const TEMP_EXTENSIONS: &[&str] = &[ "#", "thumbs.db", ".bak", "~", ".tmp", ".temp", ".ds_store", ".crdownload", ".part", ".cache", ".dmp", ".download", ".partial", ]; #[derive(Clone, Serialize, Debug)] pub struct TemporaryFileEntry { pub path: PathBuf, pub modified_date: u64, } impl TemporaryFileEntry { pub fn get_path(&self) -> &PathBuf { &self.path } pub fn get_modified_date(&self) -> u64 { self.modified_date } } #[derive(Default)] pub struct Info { pub number_of_temporary_files: usize, } pub struct Temporary { common_data: CommonToolData, information: Info, temporary_files: Vec, } impl Temporary { pub fn new() -> Self { Self { common_data: CommonToolData::new(ToolType::TemporaryFiles), information: Info::default(), temporary_files: vec![], } } #[fun_time(message = "find_temporary_files", level = "info")] pub fn find_temporary_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) { self.prepare_items(); if !self.check_files(stop_receiver, progress_sender) { self.common_data.stopped_search = true; return; } self.delete_files(); self.debug_print(); } #[fun_time(message = "check_files", level = "debug")] fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { let mut folders_to_check: Vec = self.common_data.directories.included_directories.clone(); let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, CurrentStage::CollectingFiles, 0, self.get_test_type()); while !folders_to_check.is_empty() { if check_if_stop_received(stop_receiver) { send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); return false; } let segments: Vec<_> = folders_to_check .into_par_iter() .map(|current_folder| { let mut dir_result = vec![]; let mut warnings = vec![]; let mut fe_result = vec![]; let Some(read_dir) = common_read_dir(¤t_folder, &mut warnings) else { return (dir_result, warnings, fe_result); }; // Check every sub folder/file/link etc. for entry in read_dir { let Ok(entry_data) = entry else { continue; }; let Ok(file_type) = entry_data.file_type() else { continue; }; if file_type.is_dir() { check_folder_children( &mut dir_result, &mut warnings, &entry_data, self.common_data.recursive_search, &self.common_data.directories, &self.common_data.excluded_items, ); } else if file_type.is_file() { if let Some(file_entry) = self.get_file_entry(&atomic_counter, &entry_data, &mut warnings) { fe_result.push(file_entry); } } } (dir_result, warnings, fe_result) }) .collect(); let required_size = segments.iter().map(|(segment, _, _)| segment.len()).sum::(); folders_to_check = Vec::with_capacity(required_size); // Process collected data for (segment, warnings, fe_result) in segments { folders_to_check.extend(segment); self.common_data.text_messages.warnings.extend(warnings); for fe in fe_result { self.temporary_files.push(fe); } } } send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); self.information.number_of_temporary_files = self.temporary_files.len(); true } pub fn get_file_entry(&self, atomic_counter: &Arc, entry_data: &DirEntry, warnings: &mut Vec) -> Option { atomic_counter.fetch_add(1, Ordering::Relaxed); let current_file_name = entry_data.path(); if self.common_data.excluded_items.is_excluded(¤t_file_name) { return None; } let file_name = entry_data.file_name(); let file_name_ascii_lowercase = file_name.to_ascii_lowercase(); let file_name_lowercase = file_name_ascii_lowercase.to_string_lossy(); if !TEMP_EXTENSIONS.iter().any(|f| file_name_lowercase.ends_with(f)) { return None; } let Ok(metadata) = entry_data.metadata() else { return None; }; // Creating new file entry Some(TemporaryFileEntry { modified_date: get_modified_time(&metadata, warnings, ¤t_file_name, false), path: current_file_name, }) } #[fun_time(message = "delete_files", level = "debug")] fn delete_files(&mut self) { match self.common_data.delete_method { DeleteMethod::Delete => { let mut warnings = Vec::new(); for file_entry in &self.temporary_files { if fs::remove_file(file_entry.path.clone()).is_err() { warnings.push(file_entry.path.to_string_lossy().to_string()); } } self.common_data.text_messages.warnings.extend(warnings); } DeleteMethod::None => { //Just do nothing } _ => unreachable!(), } } } impl PrintResults for Temporary { fn write_results(&self, writer: &mut T) -> std::io::Result<()> { writeln!( writer, "Results of searching {:?} with excluded directories {:?} and excluded items {:?}", self.common_data.directories.included_directories, self.common_data.directories.excluded_directories, self.common_data.excluded_items.get_excluded_items() )?; writeln!(writer, "Found {} temporary files.\n", self.information.number_of_temporary_files)?; for file_entry in &self.temporary_files { writeln!(writer, "\"{}\"", file_entry.path.to_string_lossy())?; } Ok(()) } fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()> { self.save_results_to_file_as_json_internal(file_name, &self.temporary_files, pretty_print) } } impl Default for Temporary { fn default() -> Self { Self::new() } } impl DebugPrint for Temporary { fn debug_print(&self) { if !cfg!(debug_assertions) { return; } println!("### Information's"); println!("Temporary list size - {}", self.temporary_files.len()); self.debug_print_common(); } } impl CommonData for Temporary { fn get_cd(&self) -> &CommonToolData { &self.common_data } fn get_cd_mut(&mut self) -> &mut CommonToolData { &mut self.common_data } } impl Temporary { pub const fn get_temporary_files(&self) -> &Vec { &self.temporary_files } pub const fn get_information(&self) -> &Info { &self.information } }