infer-0.19.0/.cargo_vcs_info.json0000644000000001360000000000100122260ustar { "git": { "sha1": "4256ea07165a9393186b6c190541fac4b5e6f9f5" }, "path_in_vcs": "" }infer-0.19.0/.chglog/CHANGELOG.tpl.md000064400000000000000000000013371046102023000147520ustar 00000000000000## Changelog {{ range .Versions }} ## {{ if .Tag.Previous }}{{ .Tag.Name }}{{ else }}{{ .Tag.Name }}{{ end }} - {{ datetime "2006-01-02" .Tag.Date }} {{ if .CommitGroups -}} {{ range .CommitGroups -}} ### {{ .Title }} {{ range .Commits -}} - {{.Hash.Short}} {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ if .Subject }}{{ .Subject }}{{ else }}{{ .Header }}{{ end }} {{ end }} {{ end -}} ### Commits {{ range .Commits -}} - {{.Hash.Short}} {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ if .Subject }}{{ .Subject }}{{ else }}{{ .Header }}{{ end }} {{ end }} {{ end -}} {{- if .NoteGroups -}} {{ range .NoteGroups -}} ### {{ .Title }} {{ range .Notes }} {{ .Body }} {{ end }} {{ end -}} {{ end -}} {{ end -}}infer-0.19.0/.chglog/config.yml000064400000000000000000000010151046102023000143240ustar 00000000000000style: github template: CHANGELOG.tpl.md info: title: CHANGELOG repository_url: https://github.com/bojand/infer options: commits: filters: Type: - docs - enhance - feat - fix - build commit_groups: title_maps: docs: Documentation enhance: Enhancements feat: Features fix: Bug Fixes build: Build header: pattern: "^(\\w*)?\\:\\s(.*)$" pattern_maps: - Type - Subject notes: keywords: - BREAKING CHANGEinfer-0.19.0/.github/workflows/build.yml000064400000000000000000000021311046102023000162320ustar 00000000000000name: build on: push: branches: - master paths: - 'Cargo*' - 'src/**' - 'tests/**' - 'testdata/**' pull_request: branches: - master paths: - 'Cargo*' - 'src/**' - 'tests/**' - 'testdata/**' jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - name: Install rustfmt run: rustup component add rustfmt - name: Install clippy run: rustup component add clippy - name: Check format run: cargo fmt --all -- --check - name: Lint (all features) run: cargo clippy --all-targets --all-features --examples -- -D clippy::all - name: Lint (no_std) run: cargo clippy --all-targets --no-default-features --features alloc -- -D clippy::all - name: Lint (no_alloc) run: cargo clippy --all-targets --no-default-features -- -D clippy::all - name: Test (default) run: cargo test --verbose - name: Test (no_std) run: cargo test --no-default-features --features alloc --verbose - name: Test (no_alloc) run: cargo test --no-default-features --verbose infer-0.19.0/.github/workflows/release.yml000064400000000000000000000041221046102023000165550ustar 00000000000000name: release on: push: tags: - v* jobs: github_build: name: GitHub Build runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v1 - name: Install rustfmt run: rustup component add rustfmt - name: Install clippy run: rustup component add clippy - name: Check format run: cargo fmt --all -- --check - name: Lint (all features) run: cargo clippy --all-targets --all-features -- -D clippy::all - name: Lint (no_std) run: cargo clippy --all-targets --no-default-features --features alloc -- -D clippy::all - name: Lint (no_alloc) run: cargo clippy --all-targets --no-default-features -- -D clippy::all - name: Test (default) run: cargo test --verbose - name: Test (no_std) run: cargo test --no-default-features --features alloc --verbose - name: Test (no_alloc) run: cargo test --no-default-features --verbose github_release: name: Create GitHub Release needs: github_build runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v3 with: fetch-depth: 0 - name: Generate release notes run: | export PATH=$PATH:$(go env GOPATH)/bin go install github.com/git-chglog/git-chglog/cmd/git-chglog@latest git-chglog -c .chglog/config.yml $(git describe --tags) > RELEASE.md - name: Create GitHub release ${{ matrix.target }} uses: softprops/action-gh-release@v1 with: body_path: RELEASE.md env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Publish to Crates.io cargo_publish: name: Publish Cargo Package needs: github_build runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable override: true - run: cargo login $CRATES_IO_TOKEN - run: cargo publish env: CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}infer-0.19.0/.gitignore000064400000000000000000000005001046102023000130010ustar 00000000000000# Generated by Cargo # will have compiled files and executables /target/ # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html Cargo.lock # These are backup files generated by rustfmt **/*.rs.bk infer-0.19.0/Cargo.lock0000644000000017270000000000100102100ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 4 [[package]] name = "byteorder" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "cfb" version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f" dependencies = [ "byteorder", "fnv", "uuid", ] [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "infer" version = "0.19.0" dependencies = [ "cfb", ] [[package]] name = "uuid" version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b" infer-0.19.0/Cargo.toml0000644000000024420000000000100102260ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "infer" version = "0.19.0" authors = ["Bojan "] build = false exclude = [ "/testdata", "/tests", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Small crate to infer file type based on magic number signatures" homepage = "https://github.com/bojand/infer" documentation = "https://docs.rs/infer" readme = "README.md" keywords = [ "magic-number", "filetype", "mime", "mime-types", "no_std", ] license = "MIT" repository = "https://github.com/bojand/infer" [lib] name = "infer" path = "src/lib.rs" [[example]] name = "file" path = "examples/file.rs" required-features = ["std"] [dependencies.cfb] version = "0.7.0" optional = true [features] alloc = [] default = ["std"] std = [ "alloc", "cfb", ] infer-0.19.0/Cargo.toml.orig000064400000000000000000000012251046102023000137050ustar 00000000000000[package] name = "infer" version = "0.19.0" authors = ["Bojan "] edition = "2018" description = "Small crate to infer file type based on magic number signatures" license = "MIT" keywords = ["magic-number", "filetype", "mime", "mime-types", "no_std"] readme = "README.md" homepage = "https://github.com/bojand/infer" repository = "https://github.com/bojand/infer" documentation = "https://docs.rs/infer" exclude = ["/testdata", "/tests"] [features] default = ["std"] std = ["alloc", "cfb"] alloc = [] [[example]] name = "file" path = "examples/file.rs" required-features = ["std"] [dependencies] cfb = { version = "0.7.0", optional = true } infer-0.19.0/LICENSE000064400000000000000000000020461046102023000120250ustar 00000000000000MIT License Copyright (c) 2019 Bojan Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. infer-0.19.0/README.md000064400000000000000000000145251046102023000123040ustar 00000000000000# infer ![Build Status](https://github.com/bojand/infer/workflows/build/badge.svg) [![crates version](https://img.shields.io/crates/v/infer.svg)](https://crates.io/crates/infer) [![documentation](https://docs.rs/infer/badge.svg)](https://docs.rs/infer) Small crate to infer file and MIME type by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)) signature. Adaptation of [filetype](https://github.com/h2non/filetype) Go package ported to Rust. Does not require magic file database (i.e. `/etc/magic`). ## Features - Supports a [wide range](#supported-types) of file types - Provides file extension and MIME type - File discovery by extension or MIME type - File discovery by class (image, video, audio...) - Supports custom new types and matchers ## Installation This crate works with Cargo and is on [crates.io](https://crates.io/crates/infer). Add it to your `Cargo.toml` like so: ```toml [dependencies] infer = "0.3" ``` If you are not using the custom matcher or the file type from file path functionality you can make this crate even lighter by importing it with no default features, like so: ```toml [dependencies] infer = { version = "0.3", default-features = false } ``` ## no_std and no_alloc support This crate supports `no_std` and `no_alloc` environments. `std` support is enabled by default, but you can disable it by importing the crate with no default features, making it depend only on the Rust `core` Library. `alloc` has to be enabled to be able to use custom file matchers. `std` has to be enabled to be able to get the file type from a file given the file path. ## Examples Most operations can be done via _top level functions_, but they are also available through the `Infer` struct, which must be used when dealing custom matchers. ### Get the type of a buffer ```rust let buf = [0xFF, 0xD8, 0xFF, 0xAA]; let kind = infer::get(&buf).expect("file type is known"); assert_eq!(kind.mime_type(), "image/jpeg"); assert_eq!(kind.extension(), "jpg"); ``` ### Check file type by path ```rust let kind = infer::get_from_path("testdata/sample.jpg") .expect("file read successfully") .expect("file type is known"); assert_eq!(kind.mime_type(), "image/jpeg"); assert_eq!(kind.extension(), "jpg"); ``` ### Check for specific type ```rust let buf = [0xFF, 0xD8, 0xFF, 0xAA]; assert!(infer::image::is_jpeg(&buf)); ``` ### Check for specific type class ```rust let buf = [0xFF, 0xD8, 0xFF, 0xAA]; assert!(infer::is_image(&buf)); ``` ### Adds a custom file type matcher ```rust fn custom_matcher(buf: &[u8]) -> bool { return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12; } let mut info = infer::Infer::new(); info.add("custom/foo", "foo", custom_matcher); let buf = [0x10, 0x11, 0x12, 0x13]; let kind = info.get(&buf).expect("file type is known"); assert_eq!(kind.mime_type(), "custom/foo"); assert_eq!(kind.extension(), "foo"); ``` ## Supported types #### Image - **jpg** - `image/jpeg` - **png** - `image/png` - **gif** - `image/gif` - **webp** - `image/webp` - **cr2** - `image/x-canon-cr2` - **tif** - `image/tiff` - **bmp** - `image/bmp` - **heif** - `image/heif` - **avif** - `image/avif` - **jxr** - `image/vnd.ms-photo` - **psd** - `image/vnd.adobe.photoshop` - **ico** - `image/vnd.microsoft.icon` - **ora** - `image/openraster` - **djvu** - `image/vnd.djvu` #### Video - **mp4** - `video/mp4` - **m4v** - `video/x-m4v` - **mkv** - `video/x-matroska` - **webm** - `video/webm` - **mov** - `video/quicktime` - **avi** - `video/x-msvideo` - **wmv** - `video/x-ms-wmv` - **mpg** - `video/mpeg` - **flv** - `video/x-flv` #### Audio - **mid** - `audio/midi` - **mp3** - `audio/mpeg` - **m4a** - `audio/m4a` - **ogg** - `audio/ogg` - **flac** - `audio/x-flac` - **wav** - `audio/x-wav` - **amr** - `audio/amr` - **aac** - `audio/aac` - **aiff** - `audio/x-aiff` - **dsf** - `audio/x-dsf` - **ape** - `audio/x-ape` #### Archive - **epub** - `application/epub+zip` - **zip** - `application/zip` - **tar** - `application/x-tar` - **rar** - `application/vnd.rar` - **gz** - `application/gzip` - **bz2** - `application/x-bzip2` - **bz3** - `application/vnd.bzip3` - **7z** - `application/x-7z-compressed` - **xz** - `application/x-xz` - **pdf** - `application/pdf` - **swf** - `application/x-shockwave-flash` - **rtf** - `application/rtf` - **eot** - `application/octet-stream` - **ps** - `application/postscript` - **sqlite** - `application/vnd.sqlite3` - **nes** - `application/x-nintendo-nes-rom` - **crx** - `application/x-google-chrome-extension` - **cab** - `application/vnd.ms-cab-compressed` - **deb** - `application/vnd.debian.binary-package` - **ar** - `application/x-unix-archive` - **Z** - `application/x-compress` - **lz** - `application/x-lzip` - **rpm** - `application/x-rpm` - **dcm** - `application/dicom` - **zst** - `application/zstd` - **lz4** - `application/x-lz4` - **msi** - `application/x-ole-storage` - **cpio** - `application/x-cpio` - **par2** - `application/x-par2` #### Book - **epub** - `application/epub+zip` - **mobi** - `application/x-mobipocket-ebook` #### Documents - **doc** - `application/msword` - **docx** - `application/vnd.openxmlformats-officedocument.wordprocessingml.document` - **xls** - `application/vnd.ms-excel` - **xlsx** - `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` - **ppt** - `application/vnd.ms-powerpoint` - **pptx** - `application/vnd.openxmlformats-officedocument.presentationml.presentation` - **odt** - `application/vnd.oasis.opendocument.text` - **ods** - `application/vnd.oasis.opendocument.spreadsheet` - **odp** - `application/vnd.oasis.opendocument.presentation` #### Font - **woff** - `application/font-woff` - **woff2** - `application/font-woff` - **ttf** - `application/font-sfnt` - **otf** - `application/font-sfnt` #### Application - **wasm** - `application/wasm` - **exe** - `application/vnd.microsoft.portable-executable` - **dll** - `application/vnd.microsoft.portable-executable` - **elf** - `application/x-executable` - **bc** - `application/llvm` - **mach** - `application/x-mach-binary` - **class** - `application/java` - **dex** - `application/vnd.android.dex` - **dey** - `application/vnd.android.dey` - **der** - `application/x-x509-ca-cert` - **obj** - `application/x-executable` ## Known Issues - `exe` and `dll` have the same magic number so it's not possible to tell which one just based on the binary data. `exe` is returned for all. ## License MIT infer-0.19.0/examples/file.rs000064400000000000000000000016251046102023000141250ustar 00000000000000use std::env::args; use std::process::exit; fn main() { let mut args = args(); let path = match args.nth(1) { Some(path) => path, None => { eprintln!("Please specify the file path"); exit(1); } }; match infer::get_from_path(path) { Ok(Some(info)) => { println!("Through the arcane magic of this crate we determined the file type to be"); println!("mime type: {}", info.mime_type()); println!("extension: {}", info.extension()); } Ok(None) => { eprintln!("Unknown file type 😞"); eprintln!("If you think infer should be able to recognize this file type open an issue on GitHub!"); exit(1); } Err(e) => { eprintln!("Looks like something went wrong 😔"); eprintln!("{}", e); exit(1); } } } infer-0.19.0/src/lib.rs000064400000000000000000000407551046102023000127340ustar 00000000000000/*! Small crate to infer file and MIME type by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)) signature. # Examples ### Get the type of a buffer ```rust let buf = [0xFF, 0xD8, 0xFF, 0xAA]; let kind = infer::get(&buf).expect("file type is known"); assert_eq!(kind.mime_type(), "image/jpeg"); assert_eq!(kind.extension(), "jpg"); assert_eq!(kind.matcher_type(), infer::MatcherType::Image); ``` ### Check file type by path ```rust # #[cfg(feature = "std")] # fn run() { let kind = infer::get_from_path("testdata/sample.jpg") .expect("file read successfully") .expect("file type is known"); assert_eq!(kind.mime_type(), "image/jpeg"); assert_eq!(kind.extension(), "jpg"); # } ``` ### Check for specific type ```rust let buf = [0xFF, 0xD8, 0xFF, 0xAA]; assert!(infer::image::is_jpeg(&buf)); ``` ### Check for specific type class ```rust let buf = [0xFF, 0xD8, 0xFF, 0xAA]; assert!(infer::is_image(&buf)); ``` ### Adds a custom file type matcher Here we actually need to use the `Infer` struct to be able to declare custom matchers. ```rust # #[cfg(feature = "alloc")] # fn run() { fn custom_matcher(buf: &[u8]) -> bool { return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12; } let mut info = infer::Infer::new(); info.add("custom/foo", "foo", custom_matcher); let buf = [0x10, 0x11, 0x12, 0x13]; let kind = info.get(&buf).unwrap(); assert_eq!(kind.mime_type(), "custom/foo"); assert_eq!(kind.extension(), "foo"); # } ``` */ #![crate_name = "infer"] #![doc(html_root_url = "https://docs.rs/infer/latest")] #![forbid(unsafe_code)] #![cfg_attr(not(feature = "std"), no_std)] #[cfg(feature = "alloc")] extern crate alloc; mod map; mod matchers; #[cfg(feature = "alloc")] use alloc::vec::Vec; use core::fmt; #[cfg(feature = "std")] use std::fs::File; #[cfg(feature = "std")] use std::io::{self, Read}; #[cfg(feature = "std")] use std::path::Path; pub use map::MatcherType; use map::{WrapMatcher, MATCHER_MAP}; /// All the supported matchers categorized and exposed as functions pub use matchers::*; /// Matcher function pub type Matcher = fn(buf: &[u8]) -> bool; /// Generic information for a type #[derive(Copy, Clone)] pub struct Type { matcher_type: MatcherType, mime_type: &'static str, extension: &'static str, matcher: WrapMatcher, } impl Type { pub(crate) const fn new_static( matcher_type: MatcherType, mime_type: &'static str, extension: &'static str, matcher: WrapMatcher, ) -> Self { Self { matcher_type, mime_type, extension, matcher, } } /// Returns a new `Type` with matcher and extension. pub fn new( matcher_type: MatcherType, mime_type: &'static str, extension: &'static str, matcher: Matcher, ) -> Self { Self::new_static(matcher_type, mime_type, extension, WrapMatcher(matcher)) } /// Returns the type of matcher /// /// # Examples /// /// ```rust /// let info = infer::Infer::new(); /// let buf = [0xFF, 0xD8, 0xFF, 0xAA]; /// let kind = info.get(&buf).expect("file type is known"); /// /// assert_eq!(kind.matcher_type(), infer::MatcherType::Image); /// ``` pub const fn matcher_type(&self) -> MatcherType { self.matcher_type } /// Returns the mime type pub const fn mime_type(&self) -> &'static str { self.mime_type } /// Returns the file extension pub const fn extension(&self) -> &'static str { self.extension } /// Checks if buf matches this Type fn matches(&self, buf: &[u8]) -> bool { (self.matcher.0)(buf) } } impl fmt::Debug for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Type") .field("matcher_type", &self.matcher_type) .field("mime_type", &self.mime_type) .field("extension", &self.extension) .finish() } } impl fmt::Display for Type { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Display::fmt(self.mime_type, f) } } impl PartialEq for Type { fn eq(&self, other: &Self) -> bool { self.matcher_type == other.matcher_type && self.mime_type == other.mime_type && self.extension == other.extension } } /// Infer allows to use a custom set of `Matcher`s for infering a MIME type. /// /// Most operations can be done by using the _top level functions_, but when custom matchers /// are needed every call has to go through the `Infer` struct to be able /// to see the custom matchers. pub struct Infer { #[cfg(feature = "alloc")] mmap: Vec, } impl Infer { /// Initialize a new instance of the infer struct. pub const fn new() -> Infer { #[cfg(feature = "alloc")] return Infer { mmap: Vec::new() }; #[cfg(not(feature = "alloc"))] return Infer {}; } fn iter_matchers(&self) -> impl Iterator { let mmap = MATCHER_MAP.iter(); #[cfg(feature = "alloc")] return self.mmap.iter().chain(mmap); #[cfg(not(feature = "alloc"))] return mmap; } /// Returns the file type of the buffer. /// /// # Examples /// /// ```rust /// let info = infer::Infer::new(); /// let buf = [0xFF, 0xD8, 0xFF, 0xAA]; /// let kind = info.get(&buf).expect("file type is known"); /// /// assert_eq!(kind.mime_type(), "image/jpeg"); /// assert_eq!(kind.extension(), "jpg"); /// ``` pub fn get(&self, buf: &[u8]) -> Option { self.iter_matchers().find(|kind| kind.matches(buf)).copied() } /// Returns the file type of the file given a path. /// /// # Examples /// /// See [`get_from_path`](./fn.get_from_path.html). #[cfg(feature = "std")] pub fn get_from_path>(&self, path: P) -> io::Result> { let file = File::open(path)?; let limit = file .metadata() .map(|m| std::cmp::min(m.len(), 8192) as usize + 1) .unwrap_or(0); let mut bytes = Vec::with_capacity(limit); file.take(8192).read_to_end(&mut bytes)?; Ok(self.get(&bytes)) } /// Determines whether a buffer is of given extension. /// /// # Examples /// /// See [`is`](./fn.is.html). pub fn is(&self, buf: &[u8], extension: &str) -> bool { self.iter_matchers() .any(|kind| kind.extension() == extension && kind.matches(buf)) } /// Determines whether a buffer is of given mime type. /// /// # Examples /// /// See [`is_mime`](./fn.is_mime.html). pub fn is_mime(&self, buf: &[u8], mime_type: &str) -> bool { self.iter_matchers() .any(|kind| kind.mime_type() == mime_type && kind.matches(buf)) } /// Returns whether an extension is supported. /// /// # Examples /// /// See [`is_supported`](./fn.is_supported.html). pub fn is_supported(&self, extension: &str) -> bool { self.iter_matchers() .any(|kind| kind.extension() == extension) } /// Returns whether a mime type is supported. /// /// # Examples /// /// See [`is_mime_supported`](./fn.is_mime_supported.html). pub fn is_mime_supported(&self, mime_type: &str) -> bool { self.iter_matchers() .any(|kind| kind.mime_type() == mime_type) } /// Determines whether a buffer is an application type. /// /// # Examples /// /// See [`is_app`](./fn.is_app.html). pub fn is_app(&self, buf: &[u8]) -> bool { self.is_type(buf, MatcherType::App) } /// Determines whether a buffer is an archive type. /// /// # Examples /// /// See [`is_archive`](./fn.is_archive.html). pub fn is_archive(&self, buf: &[u8]) -> bool { self.is_type(buf, MatcherType::Archive) } /// Determines whether a buffer is an audio type. /// /// # Examples /// /// See [`is_audio`](./fn.is_audio.html). pub fn is_audio(&self, buf: &[u8]) -> bool { self.is_type(buf, MatcherType::Audio) } /// Determines whether a buffer is a book type. /// /// # Examples /// /// See [`is_book`](./fn.is_book.html). pub fn is_book(&self, buf: &[u8]) -> bool { self.is_type(buf, MatcherType::Book) } /// Determines whether a buffer is a document type. /// /// # Examples /// /// See [`is_document`](./fn.is_document.html). pub fn is_document(&self, buf: &[u8]) -> bool { self.is_type(buf, MatcherType::Doc) } /// Determines whether a buffer is a font type. /// /// # Examples /// /// See [`is_font`](./fn.is_font.html). pub fn is_font(&self, buf: &[u8]) -> bool { self.is_type(buf, MatcherType::Font) } /// Determines whether a buffer is an image type. /// /// # Examples /// /// See [`is_image`](./fn.is_image.html). pub fn is_image(&self, buf: &[u8]) -> bool { self.is_type(buf, MatcherType::Image) } /// Determines whether a buffer is a video type. /// /// # Examples /// /// See [`is_video`](./fn.is_video.html). pub fn is_video(&self, buf: &[u8]) -> bool { self.is_type(buf, MatcherType::Video) } /// Determines whether a buffer is one of the custom types added. /// /// # Examples /// /// ```rust /// # #[cfg(feature = "alloc")] /// # fn run() { /// fn custom_matcher(buf: &[u8]) -> bool { /// return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12; /// } /// /// let mut info = infer::Infer::new(); /// info.add("custom/foo", "foo", custom_matcher); /// let buf = [0x10, 0x11, 0x12, 0x13]; /// assert!(info.is_custom(&buf)); /// # } /// ``` pub fn is_custom(&self, buf: &[u8]) -> bool { self.is_type(buf, MatcherType::Custom) } /// Adds a custom matcher. /// /// Custom matchers are matched in order of addition and before /// the default set of matchers. /// /// # Examples /// /// ```rust /// fn custom_matcher(buf: &[u8]) -> bool { /// return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12; /// } /// /// let mut info = infer::Infer::new(); /// info.add("custom/foo", "foo", custom_matcher); /// let buf = [0x10, 0x11, 0x12, 0x13]; /// let kind = info.get(&buf).expect("file type is known"); /// /// assert_eq!(kind.mime_type(), "custom/foo"); /// assert_eq!(kind.extension(), "foo"); /// ``` #[cfg(feature = "alloc")] pub fn add(&mut self, mime_type: &'static str, extension: &'static str, m: Matcher) { self.mmap.push(Type::new_static( MatcherType::Custom, mime_type, extension, WrapMatcher(m), )); } fn is_type(&self, buf: &[u8], matcher_type: MatcherType) -> bool { self.iter_matchers() .any(|kind| kind.matcher_type() == matcher_type && kind.matches(buf)) } } impl Default for Infer { fn default() -> Self { Infer::new() } } static INFER: Infer = Infer::new(); /// Returns the file type of the buffer. /// /// # Examples /// /// ```rust /// let info = infer::Infer::new(); /// let buf = [0xFF, 0xD8, 0xFF, 0xAA]; /// let kind = info.get(&buf).expect("file type is known"); /// /// assert_eq!(kind.mime_type(), "image/jpeg"); /// assert_eq!(kind.extension(), "jpg"); /// ``` pub fn get(buf: &[u8]) -> Option { INFER.get(buf) } /// Returns the file type of the file given a path. /// /// # Errors /// /// Returns an error if we fail to read the path. /// /// # Examples /// /// ```rust /// let kind = infer::get_from_path("testdata/sample.jpg") /// .expect("file read successfully") /// .expect("file type is known"); /// /// assert_eq!(kind.mime_type(), "image/jpeg"); /// assert_eq!(kind.extension(), "jpg"); /// ``` #[cfg(feature = "std")] pub fn get_from_path>(path: P) -> io::Result> { INFER.get_from_path(path) } /// Determines whether a buffer is of given extension. /// /// # Examples /// /// ```rust /// let buf = [0xFF, 0xD8, 0xFF, 0xAA]; /// assert!(infer::is(&buf, "jpg")); /// ``` pub fn is(buf: &[u8], extension: &str) -> bool { INFER.is(buf, extension) } /// Determines whether a buffer is of given mime type. /// /// # Examples /// /// ```rust /// let buf = [0xFF, 0xD8, 0xFF, 0xAA]; /// assert!(infer::is_mime(&buf, "image/jpeg")); /// ``` pub fn is_mime(buf: &[u8], mime_type: &str) -> bool { INFER.is_mime(buf, mime_type) } /// Returns whether an extension is supported. /// /// # Examples /// /// ```rust /// assert!(infer::is_supported("jpg")); /// ``` pub fn is_supported(extension: &str) -> bool { INFER.is_supported(extension) } /// Returns whether a mime type is supported. /// /// # Examples /// /// ```rust /// assert!(infer::is_mime_supported("image/jpeg")); /// ``` pub fn is_mime_supported(mime_type: &str) -> bool { INFER.is_mime_supported(mime_type) } /// Determines whether a buffer is an application type. /// /// # Examples /// /// ```rust /// use std::fs; /// assert!(infer::is_app(&fs::read("testdata/sample.wasm").unwrap())); /// ``` pub fn is_app(buf: &[u8]) -> bool { INFER.is_app(buf) } /// Determines whether a buffer is an archive type. /// # Examples /// /// ```rust /// use std::fs; /// assert!(infer::is_archive(&fs::read("testdata/sample.pdf").unwrap())); /// ``` pub fn is_archive(buf: &[u8]) -> bool { INFER.is_archive(buf) } /// Determines whether a buffer is an audio type. /// /// # Examples /// /// ```rust /// // mp3 /// let v = [0xff, 0xfb, 0x90, 0x44, 0x00]; /// assert!(infer::is_audio(&v)); /// ``` pub fn is_audio(buf: &[u8]) -> bool { INFER.is_audio(buf) } /// Determines whether a buffer is a book type. /// /// # Examples /// /// ```rust /// use std::fs; /// assert!(infer::is_book(&fs::read("testdata/sample.epub").unwrap())); /// ``` pub fn is_book(buf: &[u8]) -> bool { INFER.is_book(buf) } /// Determines whether a buffer is a document type. /// /// # Examples /// /// ```rust /// use std::fs; /// assert!(infer::is_document(&fs::read("testdata/sample.docx").unwrap())); /// ``` pub fn is_document(buf: &[u8]) -> bool { INFER.is_document(buf) } /// Determines whether a buffer is a font type. /// /// # Examples /// /// ```rust /// use std::fs; /// assert!(infer::is_font(&fs::read("testdata/sample.ttf").unwrap())); /// ``` pub fn is_font(buf: &[u8]) -> bool { INFER.is_font(buf) } /// Determines whether a buffer is an image type. /// /// # Examples /// /// ```rust /// let v = [0xFF, 0xD8, 0xFF, 0xAA]; /// assert!(infer::is_image(&v)); /// ``` pub fn is_image(buf: &[u8]) -> bool { INFER.is_image(buf) } /// Determines whether a buffer is a video type. /// /// # Examples /// /// ```rust /// use std::fs; /// assert!(infer::is_video(&fs::read("testdata/sample.mov").unwrap())); /// ``` pub fn is_video(buf: &[u8]) -> bool { INFER.is_video(buf) } #[cfg(test)] mod tests { #[cfg(feature = "alloc")] use super::Infer; #[test] fn test_get_unknown() { let buf = []; assert!(crate::get(&buf).is_none()); } #[test] fn test_get_jpeg() { let buf = [0xFF, 0xD8, 0xFF, 0xAA]; let kind = crate::get(&buf).expect("file type is known"); assert_eq!(kind.extension(), "jpg"); assert_eq!(kind.mime_type(), "image/jpeg"); } #[test] fn test_matcher_type() { let buf = [0xFF, 0xD8, 0xFF, 0xAA]; let kind = crate::get(&buf).expect("file type is known"); assert_eq!(kind.matcher_type(), crate::MatcherType::Image); } #[cfg(feature = "alloc")] #[test] fn test_custom_matcher_ordering() { // overrides jpeg matcher fn foo_matcher(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0xFF && buf[1] == 0xD8 && buf[2] == 0xFF } // overrides png matcher fn bar_matcher(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x89 && buf[1] == 0x50 && buf[2] == 0x4E && buf[3] == 0x47 } let mut info = Infer::new(); info.add("custom/foo", "foo", foo_matcher); info.add("custom/bar", "bar", bar_matcher); let buf_foo = &[0xFF, 0xD8, 0xFF]; let typ = info.get(buf_foo).expect("type is matched"); assert_eq!(typ.mime_type(), "custom/foo"); assert_eq!(typ.extension(), "foo"); let buf_bar = &[0x89, 0x50, 0x4E, 0x47]; let typ = info.get(buf_bar).expect("type is matched"); assert_eq!(typ.mime_type(), "custom/bar"); assert_eq!(typ.extension(), "bar"); } } infer-0.19.0/src/map.rs000064400000000000000000000306651046102023000127420ustar 00000000000000use super::{matchers, Matcher, Type}; #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum MatcherType { App, Archive, Audio, Book, Doc, Font, Image, Text, Video, Custom, } // This is needed until function pointers can be used in `const fn`. // See trick and discussion at https://github.com/rust-lang/rust/issues/63997#issuecomment-616666309 #[repr(transparent)] #[derive(Copy, Clone)] pub struct WrapMatcher(pub Matcher); macro_rules! matcher_map { ($(($mtype:expr, $mime_type:literal, $extension:literal, $matcher:expr)),*) => { pub const MATCHER_MAP: &[Type] = &[ $(Type::new_static($mtype, $mime_type, $extension, WrapMatcher($matcher)),)* ]; }; } // Order: Application, Image, Video, Audio, Font, Document, Archive, Text. // The above order should be preserved when adding new types since // it may affect match result and/or performances. matcher_map!( // Application ( MatcherType::App, "application/wasm", "wasm", matchers::app::is_wasm ), ( MatcherType::App, "application/x-executable", "elf", matchers::app::is_elf ), ( MatcherType::App, "application/vnd.microsoft.portable-executable", "exe", matchers::app::is_exe ), ( MatcherType::App, "application/vnd.microsoft.portable-executable", "dll", matchers::app::is_dll ), ( MatcherType::App, "application/java", "class", matchers::app::is_java ), ( MatcherType::App, "application/x-llvm", "bc", matchers::app::is_llvm ), ( MatcherType::App, "application/x-mach-binary", "mach", matchers::app::is_mach ), ( MatcherType::App, "application/vnd.android.dex", "dex", matchers::app::is_dex ), ( MatcherType::App, "application/vnd.android.dey", "dey", matchers::app::is_dey ), ( MatcherType::App, "application/x-x509-ca-cert", "der", matchers::app::is_der ), ( MatcherType::App, "application/x-executable", "obj", matchers::app::is_coff ), ( MatcherType::App, "application/x-x509-ca-cert", "pem", matchers::app::is_pem ), // Book ( MatcherType::Book, "application/epub+zip", "epub", matchers::book::is_epub ), ( MatcherType::Book, "application/x-mobipocket-ebook", "mobi", matchers::book::is_mobi ), // Image ( MatcherType::Image, "image/jpeg", "jpg", matchers::image::is_jpeg ), ( MatcherType::Image, "image/jp2", "jp2", matchers::image::is_jpeg2000 ), ( MatcherType::Image, "image/png", "png", matchers::image::is_png ), ( MatcherType::Image, "image/gif", "gif", matchers::image::is_gif ), ( MatcherType::Image, "image/webp", "webp", matchers::image::is_webp ), ( MatcherType::Image, "image/x-canon-cr2", "cr2", matchers::image::is_cr2 ), ( MatcherType::Image, "image/tiff", "tif", matchers::image::is_tiff ), ( MatcherType::Image, "image/bmp", "bmp", matchers::image::is_bmp ), ( MatcherType::Image, "image/vnd.ms-photo", "jxr", matchers::image::is_jxr ), ( MatcherType::Image, "image/vnd.adobe.photoshop", "psd", matchers::image::is_psd ), ( MatcherType::Image, "image/vnd.microsoft.icon", "ico", matchers::image::is_ico ), ( MatcherType::Image, "image/heif", "heif", matchers::image::is_heif ), ( MatcherType::Image, "image/avif", "avif", matchers::image::is_avif ), ( MatcherType::Image, "image/jxl", "jxl", matchers::image::is_jxl ), ( MatcherType::Image, "image/openraster", "ora", matchers::image::is_ora ), ( MatcherType::Image, "image/vnd.djvu", "djvu", matchers::image::is_djvu ), // Video ( MatcherType::Video, "video/mp4", "mp4", matchers::video::is_mp4 ), ( MatcherType::Video, "video/x-m4v", "m4v", matchers::video::is_m4v ), ( MatcherType::Video, "video/x-matroska", "mkv", matchers::video::is_mkv ), ( MatcherType::Video, "video/webm", "webm", matchers::video::is_webm ), ( MatcherType::Video, "video/quicktime", "mov", matchers::video::is_mov ), ( MatcherType::Video, "video/x-msvideo", "avi", matchers::video::is_avi ), ( MatcherType::Video, "video/x-ms-wmv", "wmv", matchers::video::is_wmv ), ( MatcherType::Video, "video/mpeg", "mpg", matchers::video::is_mpeg ), ( MatcherType::Video, "video/x-flv", "flv", matchers::video::is_flv ), // Audio ( MatcherType::Audio, "audio/midi", "midi", matchers::audio::is_midi ), ( MatcherType::Audio, "audio/mpeg", "mp3", matchers::audio::is_mp3 ), ( MatcherType::Audio, "audio/m4a", "m4a", matchers::audio::is_m4a ), // has to come before ogg ( MatcherType::Audio, "audio/opus", "opus", matchers::audio::is_ogg_opus ), ( MatcherType::Audio, "audio/ogg", "ogg", matchers::audio::is_ogg ), ( MatcherType::Audio, "audio/x-flac", "flac", matchers::audio::is_flac ), ( MatcherType::Audio, "audio/x-wav", "wav", matchers::audio::is_wav ), ( MatcherType::Audio, "audio/amr", "amr", matchers::audio::is_amr ), ( MatcherType::Audio, "audio/aac", "aac", matchers::audio::is_aac ), ( MatcherType::Audio, "audio/x-aiff", "aiff", matchers::audio::is_aiff ), ( MatcherType::Audio, "audio/x-dsf", "dsf", matchers::audio::is_dsf ), ( MatcherType::Audio, "audio/x-ape", "ape", matchers::audio::is_ape ), // Font ( MatcherType::Font, "application/font-woff", "woff", matchers::font::is_woff ), ( MatcherType::Font, "application/font-woff", "woff2", matchers::font::is_woff2 ), ( MatcherType::Font, "application/font-sfnt", "ttf", matchers::font::is_ttf ), ( MatcherType::Font, "application/font-sfnt", "otf", matchers::font::is_otf ), // Document ( MatcherType::Doc, "application/msword", "doc", matchers::doc::is_doc ), ( MatcherType::Doc, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx", matchers::doc::is_docx ), ( MatcherType::Doc, "application/vnd.ms-excel", "xls", matchers::doc::is_xls ), ( MatcherType::Doc, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx", matchers::doc::is_xlsx ), ( MatcherType::Doc, "application/vnd.ms-powerpoint", "ppt", matchers::doc::is_ppt ), ( MatcherType::Doc, "application/vnd.openxmlformats-officedocument.presentationml.presentation", "pptx", matchers::doc::is_pptx ), // OpenDocument ( MatcherType::Doc, "application/vnd.oasis.opendocument.text", "odt", matchers::odf::is_odt ), ( MatcherType::Doc, "application/vnd.oasis.opendocument.spreadsheet", "ods", matchers::odf::is_ods ), ( MatcherType::Doc, "application/vnd.oasis.opendocument.presentation", "odp", matchers::odf::is_odp ), // Archive ( MatcherType::Archive, "application/epub+zip", "epub", matchers::archive::is_epub ), ( MatcherType::Archive, "application/zip", "zip", matchers::archive::is_zip ), ( MatcherType::Archive, "application/x-tar", "tar", matchers::archive::is_tar ), ( MatcherType::Archive, "application/x-par2", "par2", matchers::archive::is_par2 ), ( MatcherType::Archive, "application/vnd.rar", "rar", matchers::archive::is_rar ), ( MatcherType::Archive, "application/gzip", "gz", matchers::archive::is_gz ), ( MatcherType::Archive, "application/x-bzip2", "bz2", matchers::archive::is_bz2 ), ( MatcherType::Archive, "application/vnd.bzip3", "bz3", matchers::archive::is_bz3 ), ( MatcherType::Archive, "application/x-7z-compressed", "7z", matchers::archive::is_7z ), ( MatcherType::Archive, "application/x-xz", "xz", matchers::archive::is_xz ), ( MatcherType::Archive, "application/pdf", "pdf", matchers::archive::is_pdf ), ( MatcherType::Archive, "application/x-shockwave-flash", "swf", matchers::archive::is_swf ), ( MatcherType::Archive, "application/rtf", "rtf", matchers::archive::is_rtf ), ( MatcherType::Archive, "application/octet-stream", "eot", matchers::archive::is_eot ), ( MatcherType::Archive, "application/postscript", "ps", matchers::archive::is_ps ), ( MatcherType::Archive, "application/vnd.sqlite3", "sqlite", matchers::archive::is_sqlite ), ( MatcherType::Archive, "application/x-nintendo-nes-rom", "nes", matchers::archive::is_nes ), ( MatcherType::Archive, "application/x-google-chrome-extension", "crx", matchers::archive::is_crx ), ( MatcherType::Archive, "application/vnd.ms-cab-compressed", "cab", matchers::archive::is_cab ), ( MatcherType::Archive, "application/vnd.debian.binary-package", "deb", matchers::archive::is_deb ), ( MatcherType::Archive, "application/x-unix-archive", "ar", matchers::archive::is_ar ), ( MatcherType::Archive, "application/x-compress", "Z", matchers::archive::is_z ), ( MatcherType::Archive, "application/x-lzip", "lz", matchers::archive::is_lz ), ( MatcherType::Archive, "application/x-rpm", "rpm", matchers::archive::is_rpm ), ( MatcherType::Archive, "application/dicom", "dcm", matchers::archive::is_dcm ), ( MatcherType::Archive, "application/zstd", "zst", matchers::archive::is_zst ), ( MatcherType::Archive, "application/x-lz4", "lz4", matchers::archive::is_lz4 ), ( MatcherType::Archive, "application/x-ole-storage", "msi", matchers::archive::is_msi ), ( MatcherType::Archive, "application/x-cpio", "cpio", matchers::archive::is_cpio ), // Text ( MatcherType::Text, "text/html", "html", matchers::text::is_html ), (MatcherType::Text, "text/xml", "xml", matchers::text::is_xml), ( MatcherType::Text, "text/x-shellscript", "sh", matchers::text::is_shellscript ) ); infer-0.19.0/src/matchers/app.rs000064400000000000000000000127551046102023000145530ustar 00000000000000/// Returns whether a buffer is a wasm. /// /// # Examples /// /// ```rust /// use std::fs; /// assert!(infer::app::is_wasm(&fs::read("testdata/sample.wasm").unwrap())); /// ``` pub fn is_wasm(buf: &[u8]) -> bool { // WASM has starts with `\0asm`, followed by the version. // http://webassembly.github.io/spec/core/binary/modules.html#binary-magic buf.len() >= 8 && buf[0] == 0x00 && buf[1] == 0x61 && buf[2] == 0x73 && buf[3] == 0x6D && buf[4] == 0x01 && buf[5] == 0x00 && buf[6] == 0x00 && buf[7] == 0x00 } /// Returns whether a buffer is an EXE. DLL and EXE have the same magic number, so returns true also for a DLL. /// /// # Example /// /// ```rust /// use std::fs; /// assert!(infer::app::is_exe(&fs::read("testdata/sample.exe").unwrap())); /// ``` pub fn is_exe(buf: &[u8]) -> bool { buf.len() > 1 && buf[0] == 0x4D && buf[1] == 0x5A } /// Returns whether a buffer is a DLL. DLL and EXE have the same magic number, so returns true also for an EXE. pub fn is_dll(buf: &[u8]) -> bool { is_exe(buf) } /// Returns whether a buffer is an ELF. pub fn is_elf(buf: &[u8]) -> bool { buf.len() > 52 && buf[0] == 0x7F && buf[1] == 0x45 && buf[2] == 0x4C && buf[3] == 0x46 } /// Returns whether a buffer is compiled Java bytecode. pub fn is_java(buf: &[u8]) -> bool { if buf.len() < 8 || [0xca, 0xfe, 0xba, 0xbe] != buf[0..4] { return false; } //Checking the next 4 bytes are greater than or equal to 45 to distinguish from Mach-O binaries //Mach-O "Fat" binaries also use 0xCAFEBABE as magic bytes to start the file //Java are always Big Endian, after the magic bytes there are 2 bytes for the class file's //minor version and then 2 bytes for the major version //https://docs.oracle.com/javase/specs/jvms/se20/html/jvms-4.html let minor_major_bytes = [buf[4], buf[5], buf[6], buf[7]]; if u32::from_be_bytes(minor_major_bytes) < 45 { //Java class files start at a major version of 45 and a minor of 0 //So a value less than this shouldn't be a Java class file return false; } //For due dillegence confirm that the major bytes are greater than or equal to 45 u16::from_be_bytes([buf[6], buf[7]]) >= 45 } /// Returns whether a buffer is LLVM Bitcode. pub fn is_llvm(buf: &[u8]) -> bool { buf.len() >= 2 && buf[0] == 0x42 && buf[1] == 0x43 } /// Returns whether a buffer is a Mach-O binary. pub fn is_mach(buf: &[u8]) -> bool { // Mach-O binaries can be one of four variants: x86, x64, PowerPC, "Fat" (x86 + PowerPC) // https://ilostmynotes.blogspot.com/2014/05/mach-o-filetype-identification.html if buf.len() < 4 { return false; } match buf[0..4] { [width, 0xfa, 0xed, 0xfe] if width == 0xcf || width == 0xce => true, [0xfe, 0xed, 0xfa, width] if width == 0xcf || width == 0xce => true, [0xca, 0xfe, 0xba, 0xbe] if buf.len() >= 8 => { //Checking the next 4 bytes are less than 45 to distinguish from Java class files //which also use 0xCAFEBABE as magic bytes //Fat Mach-O binaries are always Big Endian u32::from_be_bytes([buf[4], buf[5], buf[6], buf[7]]) < 45 } _ => false, } } /// Returns whether a buffer is a Dalvik Executable (DEX). pub fn is_dex(buf: &[u8]) -> bool { // https://source.android.com/devices/tech/dalvik/dex-format#dex-file-magic buf.len() > 36 // magic && buf[0] == 0x64 && buf[1] == 0x65 && buf[2] == 0x78 && buf[3] == 0x0A // file sise && buf[36] == 0x70 } /// Returns whether a buffer is a Dey Optimized Dalvik Executable (ODEX). pub fn is_dey(buf: &[u8]) -> bool { buf.len() > 100 // magic && buf[0] == 0x64 && buf[1] == 0x65 && buf[2] == 0x79 && buf[3] == 0x0A // file sise && is_dex(&buf[40..100]) } /// Returns whether a buffer DER encoded X.509 certificate. pub fn is_der(buf: &[u8]) -> bool { // https://en.wikipedia.org/wiki/List_of_file_signatures // https://github.com/ReFirmLabs/binwalk/blob/master/src/binwalk/magic/crypto#L25-L37 // https://www.digitalocean.com/community/tutorials/openssl-essentials-working-with-ssl-certificates-private-keys-and-csrs // openssl req -newkey rsa:2048 -nodes -keyout domain.key -x509 -days 1 -out domain.crt // openssl x509 -in domain.crt -outform der -out domain.der buf.len() > 2 && buf[0] == 0x30 && buf[1] == 0x82 } /// Returns whether a buffer is a Common Object File Format for i386 architecture. pub fn is_coff_i386(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0x4C && buf[1] == 0x01 } /// Returns whether a buffer is a Common Object File Format for x64 architecture. pub fn is_coff_x64(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0x64 && buf[1] == 0x86 } /// Returns whether a buffer is a Common Object File Format for Itanium architecture. pub fn is_coff_ia64(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0x00 && buf[1] == 0x02 } /// Returns whether a buffer is a Common Object File Format. pub fn is_coff(buf: &[u8]) -> bool { is_coff_x64(buf) || is_coff_i386(buf) || is_coff_ia64(buf) } /// Returns whether a buffer is pem pub fn is_pem(buf: &[u8]) -> bool { // https://en.wikipedia.org/wiki/List_of_file_signatures buf.len() > 11 && buf[0] == b'-' && buf[1] == b'-' && buf[2] == b'-' && buf[3] == b'-' && buf[4] == b'-' && buf[5] == b'B' && buf[6] == b'E' && buf[7] == b'G' && buf[8] == b'I' && buf[9] == b'N' && buf[10] == b' ' } infer-0.19.0/src/matchers/archive.rs000064400000000000000000000220111046102023000153760ustar 00000000000000use core::convert::{TryFrom, TryInto}; /// Returns whether a buffer is an ePub. pub fn is_epub(buf: &[u8]) -> bool { crate::book::is_epub(buf) } /// Returns whether a buffer is a zip archive. pub fn is_zip(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x50 && buf[1] == 0x4B && (((buf[2] == 0x3 && buf[3] == 0x4) || (buf[2] == 0x5 && buf[3] == 0x6) || (buf[2] == 0x7 && buf[3] == 0x8)) || ( // winzip buf.len() > 7 && (buf[2] == 0x30 && buf[3] == 0x30 && buf[4] == 0x50 && buf[5] == 0x4B && buf[6] == 0x3 && buf[7] == 0x4) )) } /// Returns whether a buffer is a tar archive. pub fn is_tar(buf: &[u8]) -> bool { buf.len() > 261 && buf[257] == 0x75 && buf[258] == 0x73 && buf[259] == 0x74 && buf[260] == 0x61 && buf[261] == 0x72 } pub fn is_par2(buf: &[u8]) -> bool { buf.len() > 8 && buf[0] == 0x50 && buf[1] == 0x41 && buf[2] == 0x52 && buf[3] == 0x32 && buf[4] == 0x00 && buf[5] == 0x50 && buf[6] == 0x4B && buf[7] == 0x54 } /// Returns whether a buffer is a RAR archive. pub fn is_rar(buf: &[u8]) -> bool { buf.len() > 6 && buf[0] == 0x52 && buf[1] == 0x61 && buf[2] == 0x72 && buf[3] == 0x21 && buf[4] == 0x1A && buf[5] == 0x7 && (buf[6] == 0x0 || buf[6] == 0x1) } /// Returns whether a buffer is a gzip archive. pub fn is_gz(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0x1F && buf[1] == 0x8B && buf[2] == 0x8 } /// Returns whether a buffer is a bzip2 archive. pub fn is_bz2(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0x42 && buf[1] == 0x5A && buf[2] == 0x68 } /// Returns whether a buffer is a bzip3 archive. pub fn is_bz3(buf: &[u8]) -> bool { buf.len() > 4 && buf[0] == b'B' && buf[1] == b'Z' && buf[2] == b'3' && buf[3] == b'v' && buf[4] == b'1' } /// Returns whether a buffer is a 7z archive. pub fn is_7z(buf: &[u8]) -> bool { buf.len() > 5 && buf[0] == 0x37 && buf[1] == 0x7A && buf[2] == 0xBC && buf[3] == 0xAF && buf[4] == 0x27 && buf[5] == 0x1C } /// Returns whether a buffer is a PDF. pub fn is_pdf(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x25 && buf[1] == 0x50 && buf[2] == 0x44 && buf[3] == 0x46 } /// Returns whether a buffer is a SWF. pub fn is_swf(buf: &[u8]) -> bool { buf.len() > 2 && (buf[0] == 0x43 || buf[0] == 0x46) && buf[1] == 0x57 && buf[2] == 0x53 } /// Returns whether a buffer is an RTF. pub fn is_rtf(buf: &[u8]) -> bool { buf.len() > 4 && buf[0] == 0x7B && buf[1] == 0x5C && buf[2] == 0x72 && buf[3] == 0x74 && buf[4] == 0x66 } /// Returns whether a buffer is a Nintendo NES ROM. pub fn is_nes(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x4E && buf[1] == 0x45 && buf[2] == 0x53 && buf[3] == 0x1A } /// Returns whether a buffer is Google Chrome Extension pub fn is_crx(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x43 && buf[1] == 0x72 && buf[2] == 0x32 && buf[3] == 0x34 } /// Returns whether a buffer is a CAB. pub fn is_cab(buf: &[u8]) -> bool { buf.len() > 3 && ((buf[0] == 0x4D && buf[1] == 0x53 && buf[2] == 0x43 && buf[3] == 0x46) || (buf[0] == 0x49 && buf[1] == 0x53 && buf[2] == 0x63 && buf[3] == 0x28)) } /// Returns whether a buffer is a eot octet stream. pub fn is_eot(buf: &[u8]) -> bool { buf.len() > 35 && buf[34] == 0x4C && buf[35] == 0x50 && ((buf[8] == 0x02 && buf[9] == 0x00 && buf[10] == 0x01) || (buf[8] == 0x01 && buf[9] == 0x00 && buf[10] == 0x00) || (buf[8] == 0x02 && buf[9] == 0x00 && buf[10] == 0x02)) } /// Returns whether a buffer is postscript. pub fn is_ps(buf: &[u8]) -> bool { buf.len() > 1 && buf[0] == 0x25 && buf[1] == 0x21 } /// Returns whether a buffer is xz archive. pub fn is_xz(buf: &[u8]) -> bool { buf.len() > 5 && buf[0] == 0xFD && buf[1] == 0x37 && buf[2] == 0x7A && buf[3] == 0x58 && buf[4] == 0x5A && buf[5] == 0x00 } /// Returns whether a buffer is a sqlite3 database. /// /// # Example /// /// ```rust /// use std::fs; /// assert!(infer::archive::is_sqlite(&fs::read("testdata/sample.db").unwrap())); /// ``` pub fn is_sqlite(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x53 && buf[1] == 0x51 && buf[2] == 0x4C && buf[3] == 0x69 } /// Returns whether a buffer is a deb archive. pub fn is_deb(buf: &[u8]) -> bool { buf.len() > 20 && buf[0] == 0x21 && buf[1] == 0x3C && buf[2] == 0x61 && buf[3] == 0x72 && buf[4] == 0x63 && buf[5] == 0x68 && buf[6] == 0x3E && buf[7] == 0x0A && buf[8] == 0x64 && buf[9] == 0x65 && buf[10] == 0x62 && buf[11] == 0x69 && buf[12] == 0x61 && buf[13] == 0x6E && buf[14] == 0x2D && buf[15] == 0x62 && buf[16] == 0x69 && buf[17] == 0x6E && buf[18] == 0x61 && buf[19] == 0x72 && buf[20] == 0x79 } /// Returns whether a buffer is a ar archive. pub fn is_ar(buf: &[u8]) -> bool { buf.len() > 6 && buf[0] == 0x21 && buf[1] == 0x3C && buf[2] == 0x61 && buf[3] == 0x72 && buf[4] == 0x63 && buf[5] == 0x68 && buf[6] == 0x3E } /// Returns whether a buffer is a z archive. pub fn is_z(buf: &[u8]) -> bool { buf.len() > 1 && buf[0] == 0x1F && (buf[1] == 0xA0 || buf[1] == 0x9D) } /// Returns whether a buffer is a lzip archive. pub fn is_lz(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x4C && buf[1] == 0x5A && buf[2] == 0x49 && buf[3] == 0x50 } /// Returns whether a buffer is an RPM. pub fn is_rpm(buf: &[u8]) -> bool { buf.len() > 96 && buf[0] == 0xED && buf[1] == 0xAB && buf[2] == 0xEE && buf[3] == 0xDB } /// Returns whether a buffer is a dcm archive. pub fn is_dcm(buf: &[u8]) -> bool { buf.len() > 131 && buf[128] == 0x44 && buf[129] == 0x49 && buf[130] == 0x43 && buf[131] == 0x4D } const ZSTD_SKIP_START: usize = 0x184D2A50; const ZSTD_SKIP_MASK: usize = 0xFFFFFFF0; /// Returns whether a buffer is a Zstd archive. // Zstandard compressed data is made of one or more frames. // There are two frame formats defined by Zstandard: Zstandard frames and Skippable frames. // See more details from https://tools.ietf.org/id/draft-kucherawy-dispatch-zstd-00.html#rfc.section.2 pub fn is_zst(buf: &[u8]) -> bool { if buf.len() > 3 && buf[0] == 0x28 && buf[1] == 0xB5 && buf[2] == 0x2F && buf[3] == 0xFD { return true; } if buf.len() < 8 { return false; } let magic = u32::from_le_bytes(buf[0..4].try_into().unwrap()); let Ok(magic) = usize::try_from(magic) else { return false; }; if magic & ZSTD_SKIP_MASK != ZSTD_SKIP_START { return false; } let data_len = u32::from_le_bytes(buf[4..8].try_into().unwrap()); let Ok(data_len) = usize::try_from(data_len) else { return false; }; if buf.len() < 8 + data_len { return false; } let next_frame = &buf[8 + data_len..]; is_zst(next_frame) } /// Returns whether a buffer is a LZ4 archive. // LZ4 compressed data is made of one or more frames. // There are two frame formats defined by LZ4: LZ4 Frame format and Skippable frames. // See more details from https://github.com/lz4/lz4/blob/v1.9.4/doc/lz4_Frame_format.md pub fn is_lz4(buf: &[u8]) -> bool { if buf.len() > 3 && buf[0] == 0x04 && buf[1] == 0x22 && buf[2] == 0x4D && buf[3] == 0x18 { return true; } if buf.len() < 8 { return false; } let magic = u32::from_le_bytes(buf[0..4].try_into().unwrap()); let Ok(magic) = usize::try_from(magic) else { return false; }; if magic & ZSTD_SKIP_MASK != ZSTD_SKIP_START { return false; } let data_len = u32::from_le_bytes(buf[4..8].try_into().unwrap()); let Ok(data_len) = usize::try_from(data_len) else { return false; }; if buf.len() < 8 + data_len { return false; } let next_frame = &buf[8 + data_len..]; is_lz4(next_frame) } /// Returns whether a buffer is a MSI Windows Installer archive. pub fn is_msi(buf: &[u8]) -> bool { buf.len() > 7 && buf[0] == 0xD0 && buf[1] == 0xCF && buf[2] == 0x11 && buf[3] == 0xE0 && buf[4] == 0xA1 && buf[5] == 0xB1 && buf[6] == 0x1A && buf[7] == 0xE1 } /// Returns whether a buffer is a CPIO archive. pub fn is_cpio(buf: &[u8]) -> bool { (buf.len() > 1 && ((buf[0] == 0xC7 && buf[1] == 0x71) // little endian, old format || (buf[0] == 0x71 && buf[1] == 0xC7))) // big endian, old format || (buf.len() > 6 && buf[0] == 0x30 && buf[1] == 0x37 && buf[2] == 0x30 && buf[3] == 0x37 && buf[4] == 0x30 && buf[5] == 0x31) // newc format } infer-0.19.0/src/matchers/audio.rs000064400000000000000000000060761046102023000150730ustar 00000000000000/// Returns whether a buffer is MIDI data. pub fn is_midi(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x4D && buf[1] == 0x54 && buf[2] == 0x68 && buf[3] == 0x64 } /// Returns whether a buffer is MP3 data. pub fn is_mp3(buf: &[u8]) -> bool { buf.len() > 2 && ((buf[0] == 0x49 && buf[1] == 0x44 && buf[2] == 0x33) // ID3v2 // Final bit (has crc32) may be or may not be set. || (buf[0] == 0xFF && buf[1] == 0xFB)) } /// Returns whether a buffer is M4A data. pub fn is_m4a(buf: &[u8]) -> bool { buf.len() > 10 && ((buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70 && buf[8] == 0x4D && buf[9] == 0x34 && buf[10] == 0x41) || (buf[0] == 0x4D && buf[1] == 0x34 && buf[2] == 0x41 && buf[3] == 0x20)) } /// Returns whether a buffer is OGG data. pub fn is_ogg(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x4F && buf[1] == 0x67 && buf[2] == 0x67 && buf[3] == 0x53 } /// Returns whether a buffer is OGG Opus data. pub fn is_ogg_opus(buf: &[u8]) -> bool { if !is_ogg(buf) { return false; } buf.len() > 35 && buf[28] == 0x4F && buf[29] == 0x70 && buf[30] == 0x75 && buf[31] == 0x73 && buf[32] == 0x48 && buf[33] == 0x65 && buf[34] == 0x61 && buf[35] == 0x64 } /// Returns whether a buffer is FLAC data. pub fn is_flac(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x66 && buf[1] == 0x4C && buf[2] == 0x61 && buf[3] == 0x43 } /// Returns whether a buffer is WAV data. pub fn is_wav(buf: &[u8]) -> bool { buf.len() > 11 && buf[0] == 0x52 && buf[1] == 0x49 && buf[2] == 0x46 && buf[3] == 0x46 && buf[8] == 0x57 && buf[9] == 0x41 && buf[10] == 0x56 && buf[11] == 0x45 } /// Returns whether a buffer is AMR data. pub fn is_amr(buf: &[u8]) -> bool { buf.len() > 11 && buf[0] == 0x23 && buf[1] == 0x21 && buf[2] == 0x41 && buf[3] == 0x4D && buf[4] == 0x52 && buf[5] == 0x0A } /// Returns whether a buffer is AAC data. pub fn is_aac(buf: &[u8]) -> bool { buf.len() > 1 && buf[0] == 0xFF && (buf[1] == 0xF1 || buf[1] == 0xF9) } /// Returns whether a buffer is AIFF data. pub fn is_aiff(buf: &[u8]) -> bool { buf.len() > 11 && buf[0] == 0x46 && buf[1] == 0x4F && buf[2] == 0x52 && buf[3] == 0x4D && buf[8] == 0x41 && buf[9] == 0x49 && buf[10] == 0x46 && buf[11] == 0x46 } /// Returns whether a buffer is DSF data. pub fn is_dsf(buf: &[u8]) -> bool { // ref: https://dsd-guide.com/sites/default/files/white-papers/DSFFileFormatSpec_E.pdf buf.len() > 4 && buf[0] == b'D' && buf[1] == b'S' && buf[2] == b'D' && buf[3] == b' ' } /// Returns whether a buffer is APE (Monkey's Audio) data. pub fn is_ape(buf: &[u8]) -> bool { // ref: https://github.com/fernandotcl/monkeys-audio/blob/master/src/MACLib/APEHeader.h buf.len() > 4 && buf[0] == b'M' && buf[1] == b'A' && buf[2] == b'C' && buf[3] == b' ' } infer-0.19.0/src/matchers/book.rs000064400000000000000000000024301046102023000147120ustar 00000000000000/// Returns whether a buffer is an ePub. pub fn is_epub(buf: &[u8]) -> bool { buf.len() > 57 && buf[0] == 0x50 && buf[1] == 0x4B && buf[2] == 0x3 && buf[3] == 0x4 && buf[30] == 0x6D && buf[31] == 0x69 && buf[32] == 0x6D && buf[33] == 0x65 && buf[34] == 0x74 && buf[35] == 0x79 && buf[36] == 0x70 && buf[37] == 0x65 && buf[38] == 0x61 && buf[39] == 0x70 && buf[40] == 0x70 && buf[41] == 0x6C && buf[42] == 0x69 && buf[43] == 0x63 && buf[44] == 0x61 && buf[45] == 0x74 && buf[46] == 0x69 && buf[47] == 0x6F && buf[48] == 0x6E && buf[49] == 0x2F && buf[50] == 0x65 && buf[51] == 0x70 && buf[52] == 0x75 && buf[53] == 0x62 && buf[54] == 0x2B && buf[55] == 0x7A && buf[56] == 0x69 && buf[57] == 0x70 } /// Returns whether a buffer is a mobi. pub fn is_mobi(buf: &[u8]) -> bool { buf.len() > 67 // BOOK && buf[60] == 0x42 && buf[61] == 0x4F && buf[62] == 0x4F && buf[63] == 0x4B // MOBI && buf[64] == 0x4D && buf[65] == 0x4F && buf[66] == 0x42 && buf[67] == 0x49 } infer-0.19.0/src/matchers/doc.rs000064400000000000000000000107451046102023000145350ustar 00000000000000use core::convert::TryInto; use super::compare_bytes; #[allow(clippy::upper_case_acronyms)] #[derive(Debug, Eq, PartialEq)] enum DocType { DOC, DOCX, XLS, XLSX, PPT, PPTX, OOXML, } /// Returns whether a buffer is Microsoft Word Document (DOC) data. pub fn is_doc(buf: &[u8]) -> bool { ole2(buf) == Some(DocType::DOC) } /// Returns whether a buffer is Microsoft Word Open XML Format Document (DOCX) data. pub fn is_docx(buf: &[u8]) -> bool { msooxml(buf) == Some(DocType::DOCX) } /// Returns whether a buffer is Microsoft Excel 97-2003 Worksheet (XLS) data. pub fn is_xls(buf: &[u8]) -> bool { ole2(buf) == Some(DocType::XLS) } /// Returns whether a buffer is Microsoft Excel Open XML Format Spreadsheet (XLSX) data. pub fn is_xlsx(buf: &[u8]) -> bool { msooxml(buf) == Some(DocType::XLSX) } /// Returns whether a buffer is Microsoft PowerPoint 97-2003 Presentation (PPT) data. pub fn is_ppt(buf: &[u8]) -> bool { ole2(buf) == Some(DocType::PPT) } /// Returns whether a buffer is Microsoft PowerPoint Open XML Presentation (PPTX) data. pub fn is_pptx(buf: &[u8]) -> bool { msooxml(buf) == Some(DocType::PPTX) } fn msooxml(buf: &[u8]) -> Option { let signature = [b'P', b'K', 0x03, 0x04]; // start by checking for ZIP local file header signature if !compare_bytes(buf, &signature, 0) { return None; } let v = check_msooml(buf, 0x1E); if v.is_some() { return v; } if !compare_bytes(buf, b"[Content_Types].xml", 0x1E) && !compare_bytes(buf, b"_rels/.rels", 0x1E) && !compare_bytes(buf, b"docProps", 0x1E) { return None; } // skip to the second local file header // since some documents include a 520-byte extra field following the file // header, we need to scan for the next header let mut start_offset = match u32::from_le_bytes(buf[18..22].try_into().unwrap()).checked_add(49) { Some(int) => int as usize, None => return None, }; let idx = search(buf, start_offset, 6000)?; // now skip to the *third* local file header; again, we need to scan due to a // 520-byte extra field following the file header start_offset += idx + 4 + 26; let idx = search(buf, start_offset, 6000)?; // and check the subdirectory name to determine which type of OOXML // file we have. Correct the mimetype with the registered ones: // http://technet.microsoft.com/en-us/library/cc179224.aspx start_offset += idx + 4 + 26; check_msooml(buf, start_offset)?; // OpenOffice/Libreoffice orders ZIP entry differently, so check the 4th file start_offset += 26; let idx = search(buf, start_offset, 6000); match idx { Some(idx) => start_offset += idx + 4 + 26, None => return Some(DocType::OOXML), }; let typo = check_msooml(buf, start_offset); if typo.is_some() { return typo; } Some(DocType::OOXML) } #[cfg(feature = "std")] fn ole2(buf: &[u8]) -> Option { use std::io::Cursor; if !compare_bytes(buf, &[0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1], 0) { return None; } if let Ok(file) = cfb::CompoundFile::open(Cursor::new(buf)) { return match file.root_entry().clsid().to_string().as_str() { "00020810-0000-0000-c000-000000000046" | "00020820-0000-0000-c000-000000000046" => { Some(DocType::XLS) } "00020906-0000-0000-c000-000000000046" => Some(DocType::DOC), "64818d10-4f9b-11cf-86ea-00aa00b929e8" => Some(DocType::PPT), _ => None, }; } None } #[cfg(not(feature = "std"))] fn ole2(buf: &[u8]) -> Option { if !compare_bytes(buf, &[0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1], 0) { return None; } Some(DocType::DOC) } fn check_msooml(buf: &[u8], offset: usize) -> Option { if compare_bytes(buf, b"word/", offset) { Some(DocType::DOCX) } else if compare_bytes(buf, b"ppt/", offset) { Some(DocType::PPTX) } else if compare_bytes(buf, b"xl/", offset) { Some(DocType::XLSX) } else { None } } fn search(buf: &[u8], start: usize, range: usize) -> Option { let length = buf.len(); let mut end = start + range; let signature: &[_] = &[b'P', b'K', 0x03, 0x04]; if end > length { end = length; } if start >= end { return None; } buf[start..end] .windows(signature.len()) .position(|window| window == signature) } infer-0.19.0/src/matchers/font.rs000064400000000000000000000021111046102023000147220ustar 00000000000000/// Returns whether a buffer is WOFF font data. pub fn is_woff(buf: &[u8]) -> bool { buf.len() > 7 && buf[0] == 0x77 && buf[1] == 0x4F && buf[2] == 0x46 && buf[3] == 0x46 && buf[4] == 0x00 && buf[5] == 0x01 && buf[6] == 0x00 && buf[7] == 0x00 } /// Returns whether a buffer is WOFF2 font data. pub fn is_woff2(buf: &[u8]) -> bool { buf.len() > 7 && buf[0] == 0x77 && buf[1] == 0x4F && buf[2] == 0x46 && buf[3] == 0x32 && buf[4] == 0x00 && buf[5] == 0x01 && buf[6] == 0x00 && buf[7] == 0x00 } /// Returns whether a buffer is TTF font data. pub fn is_ttf(buf: &[u8]) -> bool { buf.len() > 4 && buf[0] == 0x00 && buf[1] == 0x01 && buf[2] == 0x00 && buf[3] == 0x00 && buf[4] == 0x00 } /// Returns whether a buffer is OTF font data. pub fn is_otf(buf: &[u8]) -> bool { buf.len() > 4 && buf[0] == 0x4F && buf[1] == 0x54 && buf[2] == 0x54 && buf[3] == 0x4F && buf[4] == 0x00 } infer-0.19.0/src/matchers/image.rs000064400000000000000000000141741046102023000150520ustar 00000000000000use core::convert::TryInto; /// Returns whether a buffer is JPEG image data. pub fn is_jpeg(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0xFF && buf[1] == 0xD8 && buf[2] == 0xFF } /// Returns whether a buffer is jpg2 image data. pub fn is_jpeg2000(buf: &[u8]) -> bool { buf.len() > 12 && buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && buf[3] == 0xC && buf[4] == 0x6A && buf[5] == 0x50 && buf[6] == 0x20 && buf[7] == 0x20 && buf[8] == 0xD && buf[9] == 0xA && buf[10] == 0x87 && buf[11] == 0xA && buf[12] == 0x0 } /// Returns whether a buffer is PNG image data. pub fn is_png(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x89 && buf[1] == 0x50 && buf[2] == 0x4E && buf[3] == 0x47 } /// Returns whether a buffer is GIF image data. pub fn is_gif(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0x47 && buf[1] == 0x49 && buf[2] == 0x46 } /// Returns whether a buffer is WEBP image data. pub fn is_webp(buf: &[u8]) -> bool { buf.len() > 11 && buf[8] == 0x57 && buf[9] == 0x45 && buf[10] == 0x42 && buf[11] == 0x50 } /// Returns whether a buffer is Canon CR2 image data. pub fn is_cr2(buf: &[u8]) -> bool { buf.len() > 10 && ((buf[0] == 0x49 && buf[1] == 0x49 && buf[2] == 0x2A && buf[3] == 0x0) || (buf[0] == 0x4D && buf[1] == 0x4D && buf[2] == 0x0 && buf[3] == 0x2A)) && buf[8] == 0x43 && buf[9] == 0x52 && buf[10] == 0x02 // CR2 major version } /// Returns whether a buffer is TIFF image data. pub fn is_tiff(buf: &[u8]) -> bool { buf.len() > 9 && ((buf[0] == 0x49 && buf[1] == 0x49 && buf[2] == 0x2A && buf[3] == 0x0) || (buf[0] == 0x4D && buf[1] == 0x4D && buf[2] == 0x0 && buf[3] == 0x2A)) && buf[8] != 0x43 && buf[9] != 0x52 && !is_cr2(buf) // To avoid conflicts differentiate Tiff from CR2 } /// Returns whether a buffer is BMP image data. pub fn is_bmp(buf: &[u8]) -> bool { buf.len() > 1 && buf[0] == 0x42 && buf[1] == 0x4D } /// Returns whether a buffer is jxr image data. pub fn is_jxr(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0x49 && buf[1] == 0x49 && buf[2] == 0xBC } /// Returns whether a buffer is Photoshop PSD image data. pub fn is_psd(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x38 && buf[1] == 0x42 && buf[2] == 0x50 && buf[3] == 0x53 } /// Returns whether a buffer is ICO icon image data. pub fn is_ico(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0x01 && buf[3] == 0x00 } /// Returns whether a buffer is JPEG XL (JXL) image data. pub fn is_jxl(buf: &[u8]) -> bool { (buf.len() > 2 && buf[0] == 0xFF && buf[1] == 0x0A) || (buf.len() > 12 && buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && buf[3] == 0x0C && buf[4] == 0x4A && buf[5] == 0x58 && buf[6] == 0x4C && buf[7] == 0x20 && buf[8] == 0x0D && buf[9] == 0x0A && buf[10] == 0x87 && buf[11] == 0x0A) } /// Returns whether a buffer is HEIF image data. pub fn is_heif(buf: &[u8]) -> bool { if buf.is_empty() { return false; } if !is_isobmff(buf) { return false; } if let Some((major, _minor, compatible)) = get_ftyp(buf) { if major == b"heic" || major == b"heix" { return true; } if major == b"mif1" || major == b"msf1" { for b in compatible { if b == b"heic" { return true; } } } } false } /// Returns whether a buffer is AVIF image data. pub fn is_avif(buf: &[u8]) -> bool { if buf.is_empty() { return false; } if !is_isobmff(buf) { return false; } if let Some((major, _minor, compatible)) = get_ftyp(buf) { if major == b"avif" || major == b"avis" { return true; } for b in compatible { if b == b"avif" || b == b"avis" { return true; } } } false } // IsISOBMFF checks whether the given buffer represents ISO Base Media File Format data fn is_isobmff(buf: &[u8]) -> bool { if buf.len() < 16 { return false; } if &buf[4..8] != b"ftyp" { return false; } let ftyp_length = u32::from_be_bytes(buf[0..4].try_into().unwrap()) as usize; buf.len() >= ftyp_length } pub fn is_ora(buf: &[u8]) -> bool { buf.len() > 57 && buf[0] == 0x50 && buf[1] == 0x4B && buf[2] == 0x3 && buf[3] == 0x4 && buf[30] == 0x6D && buf[31] == 0x69 && buf[32] == 0x6D && buf[33] == 0x65 && buf[34] == 0x74 && buf[35] == 0x79 && buf[36] == 0x70 && buf[37] == 0x65 && buf[38] == 0x69 && buf[39] == 0x6D && buf[40] == 0x61 && buf[41] == 0x67 && buf[42] == 0x65 && buf[43] == 0x2F && buf[44] == 0x6F && buf[45] == 0x70 && buf[46] == 0x65 && buf[47] == 0x6E && buf[48] == 0x72 && buf[49] == 0x61 && buf[50] == 0x73 && buf[51] == 0x74 && buf[52] == 0x65 && buf[53] == 0x72 } /// Returns whether a buffer is DjVu image data. pub fn is_djvu(buf: &[u8]) -> bool { buf.len() > 14 && buf[0] == 0x41 && buf[1] == 0x54 && buf[2] == 0x26 && buf[3] == 0x54 && buf[4] == 0x46 && buf[5] == 0x4F && buf[6] == 0x52 && buf[7] == 0x4D && buf[12] == 0x44 && buf[13] == 0x4A && buf[14] == 0x56 } // GetFtyp returns the major brand, minor version and compatible brands of the ISO-BMFF data fn get_ftyp(buf: &[u8]) -> Option<(&[u8], &[u8], impl Iterator)> { if buf.len() < 16 { return None; } let ftyp_length = u32::from_be_bytes(buf[0..4].try_into().unwrap()) as usize; let major = &buf[8..12]; let minor = &buf[12..16]; let compatible = buf[16..] .chunks_exact(4) .take((ftyp_length / 4).saturating_sub(16 / 4)); Some((major, minor, compatible)) } infer-0.19.0/src/matchers/mod.rs000064400000000000000000000010051046102023000145340ustar 00000000000000pub mod app; pub mod archive; pub mod audio; pub mod book; pub mod doc; pub mod font; pub mod image; pub mod odf; pub mod text; pub mod video; pub(crate) fn compare_bytes(slice: &[u8], sub_slice: &[u8], start_offset: usize) -> bool { let sl = sub_slice.len(); if start_offset + sl > slice.len() { return false; } for (i, v) in slice.iter().skip(start_offset).take(sl).enumerate() { let v2 = sub_slice[i]; if *v != v2 { return false; } } true } infer-0.19.0/src/matchers/odf.rs000064400000000000000000000022721046102023000145340ustar 00000000000000use super::compare_bytes; #[derive(Debug, Eq, PartialEq)] enum DocType { Text, Spreadsheet, Presentation, } /// Returns whether a buffer is OpenDocument Text pub fn is_odt(buf: &[u8]) -> bool { odf(buf) == Some(DocType::Text) } /// Returns whether a buffer is OpenDocument Spreadsheet pub fn is_ods(buf: &[u8]) -> bool { odf(buf) == Some(DocType::Spreadsheet) } /// Returns whether a buffer is OpenDocument Presentation pub fn is_odp(buf: &[u8]) -> bool { odf(buf) == Some(DocType::Presentation) } fn odf(buf: &[u8]) -> Option { let signature = [b'P', b'K', 0x03, 0x04]; // start by checking for ZIP local file header signature if !compare_bytes(buf, &signature, 0) { return None; } // Check mimetype if !compare_bytes(buf, b"mimetype", 0x1E) { return None; } if compare_bytes(buf, b"vnd.oasis.opendocument.text", 0x32) { return Some(DocType::Text); } if compare_bytes(buf, b"vnd.oasis.opendocument.spreadsheet", 0x32) { return Some(DocType::Spreadsheet); } if compare_bytes(buf, b"vnd.oasis.opendocument.presentation", 0x32) { return Some(DocType::Presentation); } None } infer-0.19.0/src/matchers/text.rs000064400000000000000000000055611046102023000147540ustar 00000000000000/// Returns whether a buffer is html data. /// /// Conforms to [whatwg](https://mimesniff.spec.whatwg.org/) /// specification. pub fn is_html(buf: &[u8]) -> bool { let values: &[&[u8]] = &[ b" val.len() { match buf[val.len()] { // tag-terminitating byte 0x20 | 0x3E => return true, _ => continue, } } } false } /// Returns whether a buffer is xml data. /// /// Conforms to [whatwg](https://mimesniff.spec.whatwg.org/) /// specification. pub fn is_xml(buf: &[u8]) -> bool { let val: &[u8] = b" &[u8] { while !buf.is_empty() { match buf[0] { 0x09 | 0x0A | 0x0C | 0x0D | 0x20 => buf = &buf[1..], _ => break, } } buf } /// Strip BOM at the beginning of the buffer. fn trim_start_byte_order_marks(mut buf: &[u8]) -> &[u8] { while buf.len() >= 3 { match (buf[0], buf[1], buf[2]) { (0xEF, 0xBB, 0xBF) => buf = &buf[3..], // UTF-8 (0xFE, 0xFF, _) => buf = &buf[2..], // UTF-16 BE (0xFF, 0xFE, _) => buf = &buf[2..], // UTF-16 BE _ => break, } } buf } fn starts_with_ignore_ascii_case(buf: &[u8], needle: &[u8]) -> bool { buf.len() >= needle.len() && buf[..needle.len()].eq_ignore_ascii_case(needle) } /// Returns whether a buffer is a shell script. pub fn is_shellscript(buf: &[u8]) -> bool { buf.len() > 2 && &buf[..2] == b"#!" } #[cfg(test)] mod tests { use super::{is_html, is_shellscript, trim_start_whitespaces}; #[test] fn trim_whitespaces() { let got = trim_start_whitespaces(&[0x09, 0x0A, 0x0C, 0x0D, 0x20, b'A', b'B', b'C']); assert_eq!(got, b"ABC"); let got = trim_start_whitespaces(b"abc"); assert_eq!(got, b"abc"); let got = trim_start_whitespaces(&[]); assert_eq!(got, &[]); } #[test] fn html() { assert!(!is_html(b"<")); assert!(!is_html(b"")); } #[test] fn shellscript() { assert!(!is_shellscript(b"#!")); } } infer-0.19.0/src/matchers/video.rs000064400000000000000000000126441046102023000150760ustar 00000000000000/// Returns whether a buffer is M4V video data. pub fn is_m4v(buf: &[u8]) -> bool { buf.len() > 10 && buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70 && buf[8] == 0x4D && buf[9] == 0x34 && buf[10] == 0x56 } /// Returns whether a buffer is MKV video data. pub fn is_mkv(buf: &[u8]) -> bool { (buf.len() > 15 && buf[0] == 0x1A && buf[1] == 0x45 && buf[2] == 0xDF && buf[3] == 0xA3 && buf[4] == 0x93 && buf[5] == 0x42 && buf[6] == 0x82 && buf[7] == 0x88 && buf[8] == 0x6D && buf[9] == 0x61 && buf[10] == 0x74 && buf[11] == 0x72 && buf[12] == 0x6F && buf[13] == 0x73 && buf[14] == 0x6B && buf[15] == 0x61) || (buf.len() > 38 && buf[31] == 0x6D && buf[32] == 0x61 && buf[33] == 0x74 && buf[34] == 0x72 && buf[35] == 0x6f && buf[36] == 0x73 && buf[37] == 0x6B && buf[38] == 0x61) } /// Returns whether a buffer is WEBM video data. pub fn is_webm(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x1A && buf[1] == 0x45 && buf[2] == 0xDF && buf[3] == 0xA3 } /// Returns whether a buffer is Quicktime MOV video data. pub fn is_mov(buf: &[u8]) -> bool { buf.len() > 15 && (((buf[4] == b'f' && buf[5] == b't' && buf[6] == b'y' && buf[7] == b'p') && (buf[8] == b'q' && buf[9] == b't' && buf[10] == b' ' && buf[11] == b' ')) || (buf[4] == 0x6d && buf[5] == 0x6f && buf[6] == 0x6f && buf[7] == 0x76) || (buf[4] == 0x6d && buf[5] == 0x64 && buf[6] == 0x61 && buf[7] == 0x74) || (buf[12] == 0x6d && buf[13] == 0x64 && buf[14] == 0x61 && buf[15] == 0x74)) } /// Returns whether a buffer is AVI video data. pub fn is_avi(buf: &[u8]) -> bool { buf.len() > 10 && buf[0] == 0x52 && buf[1] == 0x49 && buf[2] == 0x46 && buf[3] == 0x46 && buf[8] == 0x41 && buf[9] == 0x56 && buf[10] == 0x49 } /// Returns whether a buffer is WMV video data. pub fn is_wmv(buf: &[u8]) -> bool { buf.len() > 9 && buf[0] == 0x30 && buf[1] == 0x26 && buf[2] == 0xB2 && buf[3] == 0x75 && buf[4] == 0x8E && buf[5] == 0x66 && buf[6] == 0xCF && buf[7] == 0x11 && buf[8] == 0xA6 && buf[9] == 0xD9 } /// Returns whether a buffer is MPEG video data. pub fn is_mpeg(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x1 && buf[3] >= 0xb0 && buf[3] <= 0xbf } /// Returns whether a buffer is FLV video data. pub fn is_flv(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x46 && buf[1] == 0x4C && buf[2] == 0x56 && buf[3] == 0x01 } /// Returns whether a buffer is MP4 video data. pub fn is_mp4(buf: &[u8]) -> bool { buf.len() > 11 && (buf[4] == b'f' && buf[5] == b't' && buf[6] == b'y' && buf[7] == b'p') && ((buf[8] == b'a' && buf[9] == b'v' && buf[10] == b'c' && buf[11] == b'1') || (buf[8] == b'd' && buf[9] == b'a' && buf[10] == b's' && buf[11] == b'h') || (buf[8] == b'i' && buf[9] == b's' && buf[10] == b'o' && buf[11] == b'2') || (buf[8] == b'i' && buf[9] == b's' && buf[10] == b'o' && buf[11] == b'3') || (buf[8] == b'i' && buf[9] == b's' && buf[10] == b'o' && buf[11] == b'4') || (buf[8] == b'i' && buf[9] == b's' && buf[10] == b'o' && buf[11] == b'5') || (buf[8] == b'i' && buf[9] == b's' && buf[10] == b'o' && buf[11] == b'6') || (buf[8] == b'i' && buf[9] == b's' && buf[10] == b'o' && buf[11] == b'm') || (buf[8] == b'm' && buf[9] == b'm' && buf[10] == b'p' && buf[11] == b'4') || (buf[8] == b'm' && buf[9] == b'p' && buf[10] == b'4' && buf[11] == b'1') || (buf[8] == b'm' && buf[9] == b'p' && buf[10] == b'4' && buf[11] == b'2') || (buf[8] == b'm' && buf[9] == b'p' && buf[10] == b'4' && buf[11] == b'v') || (buf[8] == b'm' && buf[9] == b'p' && buf[10] == b'7' && buf[11] == b'1') || (buf[8] == b'M' && buf[9] == b'S' && buf[10] == b'N' && buf[11] == b'V') || (buf[8] == b'N' && buf[9] == b'D' && buf[10] == b'A' && buf[11] == b'S') || (buf[8] == b'N' && buf[9] == b'D' && buf[10] == b'S' && buf[11] == b'C') || (buf[8] == b'N' && buf[9] == b'S' && buf[10] == b'D' && buf[11] == b'C') || (buf[8] == b'N' && buf[9] == b'D' && buf[10] == b'S' && buf[11] == b'H') || (buf[8] == b'N' && buf[9] == b'D' && buf[10] == b'S' && buf[11] == b'M') || (buf[8] == b'N' && buf[9] == b'D' && buf[10] == b'S' && buf[11] == b'P') || (buf[8] == b'N' && buf[9] == b'D' && buf[10] == b'S' && buf[11] == b'S') || (buf[8] == b'N' && buf[9] == b'D' && buf[10] == b'X' && buf[11] == b'C') || (buf[8] == b'N' && buf[9] == b'D' && buf[10] == b'X' && buf[11] == b'H') || (buf[8] == b'N' && buf[9] == b'D' && buf[10] == b'X' && buf[11] == b'M') || (buf[8] == b'N' && buf[9] == b'D' && buf[10] == b'X' && buf[11] == b'P') || (buf[8] == b'N' && buf[9] == b'D' && buf[10] == b'X' && buf[11] == b'S') || (buf[8] == b'F' && buf[9] == b'4' && buf[10] == b'V' && buf[11] == b' ') || (buf[8] == b'F' && buf[9] == b'4' && buf[10] == b'P' && buf[11] == b' ')) }