memchr-2.4.1/.cargo_vcs_info.json0000644000000001120000000000100123050ustar { "git": { "sha1": "8e1da98fee06d66c13e66c330e3a3dd6ccf0e3a0" } } memchr-2.4.1/.gitignore000064400000000000000000000001360072674642500131230ustar 00000000000000.*.swp doc tags examples/ss10pusa.csv build target /Cargo.lock scratch* bench_large/huge tmp/ memchr-2.4.1/.ignore000064400000000000000000000000110072674642500124070ustar 00000000000000!.github memchr-2.4.1/COPYING000064400000000000000000000001760072674642500121720ustar 00000000000000This project is dual-licensed under the Unlicense and MIT licenses. You may use this code under the terms of either license. memchr-2.4.1/Cargo.toml0000644000000027070000000000100103170ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "memchr" version = "2.4.1" authors = ["Andrew Gallant ", "bluss"] exclude = ["/bench", "/.github", "/fuzz"] description = "Safe interface to memchr." homepage = "https://github.com/BurntSushi/memchr" documentation = "https://docs.rs/memchr/" readme = "README.md" keywords = ["memchr", "char", "scan", "strchr", "string"] license = "Unlicense/MIT" repository = "https://github.com/BurntSushi/memchr" [profile.bench] debug = true [profile.release] debug = true [profile.test] opt-level = 3 debug = true [lib] name = "memchr" bench = false [dependencies.compiler_builtins] version = "0.1.2" optional = true [dependencies.core] version = "1.0.0" optional = true package = "rustc-std-workspace-core" [dependencies.libc] version = "0.2.18" optional = true default-features = false [dev-dependencies.quickcheck] version = "1.0.3" default-features = false [features] default = ["std"] rustc-dep-of-std = ["core", "compiler_builtins"] std = [] use_std = ["std"] memchr-2.4.1/Cargo.toml.orig000064400000000000000000000032220072674642500140210ustar 00000000000000[package] name = "memchr" version = "2.4.1" #:version authors = ["Andrew Gallant ", "bluss"] description = "Safe interface to memchr." documentation = "https://docs.rs/memchr/" homepage = "https://github.com/BurntSushi/memchr" repository = "https://github.com/BurntSushi/memchr" readme = "README.md" keywords = ["memchr", "char", "scan", "strchr", "string"] license = "Unlicense/MIT" exclude = ["/bench", "/.github", "/fuzz"] edition = "2018" [workspace] members = ["bench"] [lib] name = "memchr" bench = false [features] default = ["std"] # The 'std' feature permits the memchr crate to use the standard library. This # permits this crate to use runtime CPU feature detection to automatically # accelerate searching via vector instructions. Without the standard library, # this automatic detection is not possible. std = [] # The 'use_std' feature is DEPRECATED. It will be removed in memchr 3. Until # then, it is alias for the 'std' feature. use_std = ["std"] # Internal feature, only used when building as part of libstd, not part of the # stable interface of this crate. rustc-dep-of-std = ['core', 'compiler_builtins'] [dependencies] libc = { version = "0.2.18", default-features = false, optional = true } # Internal feature, only used when building as part of libstd, not part of the # stable interface of this crate. core = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-core' } compiler_builtins = { version = '0.1.2', optional = true } [dev-dependencies] quickcheck = { version = "1.0.3", default-features = false } [profile.release] debug = true [profile.bench] debug = true [profile.test] opt-level = 3 debug = true memchr-2.4.1/LICENSE-MIT000064400000000000000000000020710072674642500125670ustar 00000000000000The MIT License (MIT) Copyright (c) 2015 Andrew Gallant Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. memchr-2.4.1/README.md000064400000000000000000000105630072674642500124170ustar 00000000000000memchr ====== This library provides heavily optimized routines for string search primitives. [![Build status](https://github.com/BurntSushi/memchr/workflows/ci/badge.svg)](https://github.com/BurntSushi/memchr/actions) [![](https://meritbadge.herokuapp.com/memchr)](https://crates.io/crates/memchr) Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org/). ### Documentation [https://docs.rs/memchr](https://docs.rs/memchr) ### Overview * The top-level module provides routines for searching for 1, 2 or 3 bytes in the forward or reverse direction. When searching for more than one byte, positions are considered a match if the byte at that position matches any of the bytes. * The `memmem` sub-module provides forward and reverse substring search routines. In all such cases, routines operate on `&[u8]` without regard to encoding. This is exactly what you want when searching either UTF-8 or arbitrary bytes. ### Compiling without the standard library memchr links to the standard library by default, but you can disable the `std` feature if you want to use it in a `#![no_std]` crate: ```toml [dependencies] memchr = { version = "2", default-features = false } ``` On x86 platforms, when the `std` feature is disabled, the SSE2 accelerated implementations will be used. When `std` is enabled, AVX accelerated implementations will be used if the CPU is determined to support it at runtime. ### Using libc `memchr` is a routine that is part of libc, although this crate does not use libc by default. Instead, it uses its own routines, which are either vectorized or generic fallback routines. In general, these should be competitive with what's in libc, although this has not been tested for all architectures. If using `memchr` from libc is desirable and a vectorized routine is not otherwise available in this crate, then enabling the `libc` feature will use libc's version of `memchr`. The rest of the functions in this crate, e.g., `memchr2` or `memrchr3` and the substring search routines, will always use the implementations in this crate. One exception to this is `memrchr`, which is an extension in `libc` found on Linux. On Linux, `memrchr` is used in precisely the same scenario as `memchr`, as described above. ### Minimum Rust version policy This crate's minimum supported `rustc` version is `1.41.1`. The current policy is that the minimum Rust version required to use this crate can be increased in minor version updates. For example, if `crate 1.0` requires Rust 1.20.0, then `crate 1.0.z` for all values of `z` will also require Rust 1.20.0 or newer. However, `crate 1.y` for `y > 0` may require a newer minimum version of Rust. In general, this crate will be conservative with respect to the minimum supported version of Rust. ### Testing strategy Given the complexity of the code in this crate, along with the pervasive use of `unsafe`, this crate has an extensive testing strategy. It combines multiple approaches: * Hand-written tests. * Exhaustive-style testing meant to exercise all possible branching and offset calculations. * Property based testing through [`quickcheck`](https://github.com/BurntSushi/quickcheck). * Fuzz testing through [`cargo fuzz`](https://github.com/rust-fuzz/cargo-fuzz). * A huge suite of benchmarks that are also run as tests. Benchmarks always confirm that the expected result occurs. Improvements to the testing infrastructure are very welcome. ### Algorithms used At time of writing, this crate's implementation of substring search actually has a few different algorithms to choose from depending on the situation. * For very small haystacks, [Rabin-Karp](https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm) is used to reduce latency. Rabin-Karp has very small overhead and can often complete before other searchers have even been constructed. * For small needles, a variant of the ["Generic SIMD"](http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd) algorithm is used. Instead of using the first and last bytes, a heuristic is used to select bytes based on a background distribution of byte frequencies. * In all other cases, [Two-Way](https://en.wikipedia.org/wiki/Two-way_string-matching_algorithm) is used. If possible, a prefilter based on the "Generic SIMD" algorithm linked above is used to find candidates quickly. A dynamic heuristic is used to detect if the prefilter is ineffective, and if so, disables it. memchr-2.4.1/UNLICENSE000064400000000000000000000022730072674642500124070ustar 00000000000000This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to memchr-2.4.1/build.rs000064400000000000000000000043340072674642500126040ustar 00000000000000use std::env; fn main() { enable_simd_optimizations(); enable_libc(); } // This adds various simd cfgs if this compiler and target support it. // // This can be disabled with RUSTFLAGS="--cfg memchr_disable_auto_simd", but // this is generally only intended for testing. // // On targets which don't feature SSE2, this is disabled, as LLVM wouln't know // how to work with SSE2 operands. Enabling SSE4.2 and AVX on SSE2-only targets // is not a problem. In that case, the fastest option will be chosen at // runtime. fn enable_simd_optimizations() { if is_env_set("CARGO_CFG_MEMCHR_DISABLE_AUTO_SIMD") || !target_has_feature("sse2") { return; } println!("cargo:rustc-cfg=memchr_runtime_simd"); println!("cargo:rustc-cfg=memchr_runtime_sse2"); println!("cargo:rustc-cfg=memchr_runtime_sse42"); println!("cargo:rustc-cfg=memchr_runtime_avx"); } // This adds a `memchr_libc` cfg if and only if libc can be used, if no other // better option is available. // // This could be performed in the source code, but it's simpler to do it once // here and consolidate it into one cfg knob. // // Basically, we use libc only if its enabled and if we aren't targeting a // known bad platform. For example, wasm32 doesn't have a libc and the // performance of memchr on Windows is seemingly worse than the fallback // implementation. fn enable_libc() { const NO_ARCH: &'static [&'static str] = &["wasm32", "windows"]; const NO_ENV: &'static [&'static str] = &["sgx"]; if !is_feature_set("LIBC") { return; } let arch = match env::var("CARGO_CFG_TARGET_ARCH") { Err(_) => return, Ok(arch) => arch, }; let env = match env::var("CARGO_CFG_TARGET_ENV") { Err(_) => return, Ok(env) => env, }; if NO_ARCH.contains(&&*arch) || NO_ENV.contains(&&*env) { return; } println!("cargo:rustc-cfg=memchr_libc"); } fn is_feature_set(name: &str) -> bool { is_env_set(&format!("CARGO_FEATURE_{}", name)) } fn is_env_set(name: &str) -> bool { env::var_os(name).is_some() } fn target_has_feature(feature: &str) -> bool { env::var("CARGO_CFG_TARGET_FEATURE") .map(|features| features.contains(feature)) .unwrap_or(false) } memchr-2.4.1/rustfmt.toml000064400000000000000000000000540072674642500135330ustar 00000000000000max_width = 79 use_small_heuristics = "max" memchr-2.4.1/scripts/make-byte-frequency-table000075500000000000000000000037660072674642500175260ustar 00000000000000#!/usr/bin/env python # This does simple normalized frequency analysis on UTF-8 encoded text. The # result of the analysis is translated to a ranked list, where every byte is # assigned a rank. This list is written to src/freqs.rs. # # Currently, the frequencies are generated from the following corpuses: # # * The CIA world fact book # * The source code of rustc # * Septuaginta from __future__ import absolute_import, division, print_function import argparse from collections import Counter import sys preamble = ''' // NOTE: The following code was generated by "scripts/frequencies.py", do not // edit directly '''.lstrip() def eprint(*args, **kwargs): kwargs['file'] = sys.stderr print(*args, **kwargs) def main(): p = argparse.ArgumentParser() p.add_argument('corpus', metavar='FILE', nargs='+') args = p.parse_args() # Get frequency counts of each byte. freqs = Counter() for i in range(0, 256): freqs[i] = 0 eprint('reading entire corpus into memory') corpus = [] for fpath in args.corpus: corpus.append(open(fpath, 'rb').read()) eprint('computing byte frequencies') for c in corpus: for byte in c: freqs[byte] += 1.0 / float(len(c)) eprint('writing Rust code') # Get the rank of each byte. A lower rank => lower relative frequency. rank = [0] * 256 for i, (byte, _) in enumerate(freqs.most_common()): # print(byte) rank[byte] = 255 - i # Forcefully set the highest rank possible for bytes that start multi-byte # UTF-8 sequences. The idea here is that a continuation byte will be more # discerning in a homogenous haystack. for byte in range(0xC0, 0xFF + 1): rank[byte] = 255 # Now write Rust. olines = ['pub const BYTE_FREQUENCIES: [u8; 256] = ['] for byte in range(256): olines.append(' %3d, // %r' % (rank[byte], chr(byte))) olines.append('];') print(preamble) print('\n'.join(olines)) if __name__ == '__main__': main() memchr-2.4.1/src/cow.rs000064400000000000000000000050670072674642500130700ustar 00000000000000use core::ops; /// A specialized copy-on-write byte string. /// /// The purpose of this type is to permit usage of a "borrowed or owned /// byte string" in a way that keeps std/no-std compatibility. That is, in /// no-std mode, this type devolves into a simple &[u8] with no owned variant /// available. We can't just use a plain Cow because Cow is not in core. #[derive(Clone, Debug)] pub struct CowBytes<'a>(Imp<'a>); // N.B. We don't use std::borrow::Cow here since we can get away with a // Box<[u8]> for our use case, which is 1/3 smaller than the Vec that // a Cow<[u8]> would use. #[cfg(feature = "std")] #[derive(Clone, Debug)] enum Imp<'a> { Borrowed(&'a [u8]), Owned(Box<[u8]>), } #[cfg(not(feature = "std"))] #[derive(Clone, Debug)] struct Imp<'a>(&'a [u8]); impl<'a> ops::Deref for CowBytes<'a> { type Target = [u8]; #[inline(always)] fn deref(&self) -> &[u8] { self.as_slice() } } impl<'a> CowBytes<'a> { /// Create a new borrowed CowBytes. #[inline(always)] pub fn new>(bytes: &'a B) -> CowBytes<'a> { CowBytes(Imp::new(bytes.as_ref())) } /// Create a new owned CowBytes. #[cfg(feature = "std")] #[inline(always)] pub fn new_owned(bytes: Box<[u8]>) -> CowBytes<'static> { CowBytes(Imp::Owned(bytes)) } /// Return a borrowed byte string, regardless of whether this is an owned /// or borrowed byte string internally. #[inline(always)] pub fn as_slice(&self) -> &[u8] { self.0.as_slice() } /// Return an owned version of this copy-on-write byte string. /// /// If this is already an owned byte string internally, then this is a /// no-op. Otherwise, the internal byte string is copied. #[cfg(feature = "std")] #[inline(always)] pub fn into_owned(self) -> CowBytes<'static> { match self.0 { Imp::Borrowed(b) => CowBytes::new_owned(Box::from(b)), Imp::Owned(b) => CowBytes::new_owned(b), } } } impl<'a> Imp<'a> { #[cfg(feature = "std")] #[inline(always)] pub fn new(bytes: &'a [u8]) -> Imp<'a> { Imp::Borrowed(bytes) } #[cfg(not(feature = "std"))] #[inline(always)] pub fn new(bytes: &'a [u8]) -> Imp<'a> { Imp(bytes) } #[cfg(feature = "std")] #[inline(always)] pub fn as_slice(&self) -> &[u8] { match self { Imp::Owned(ref x) => x, Imp::Borrowed(x) => x, } } #[cfg(not(feature = "std"))] #[inline(always)] pub fn as_slice(&self) -> &[u8] { self.0 } } memchr-2.4.1/src/lib.rs000064400000000000000000000146730072674642500130510ustar 00000000000000/*! This library provides heavily optimized routines for string search primitives. # Overview This section gives a brief high level overview of what this crate offers. * The top-level module provides routines for searching for 1, 2 or 3 bytes in the forward or reverse direction. When searching for more than one byte, positions are considered a match if the byte at that position matches any of the bytes. * The [`memmem`] sub-module provides forward and reverse substring search routines. In all such cases, routines operate on `&[u8]` without regard to encoding. This is exactly what you want when searching either UTF-8 or arbitrary bytes. # Example: using `memchr` This example shows how to use `memchr` to find the first occurrence of `z` in a haystack: ``` use memchr::memchr; let haystack = b"foo bar baz quuz"; assert_eq!(Some(10), memchr(b'z', haystack)); ``` # Example: matching one of three possible bytes This examples shows how to use `memrchr3` to find occurrences of `a`, `b` or `c`, starting at the end of the haystack. ``` use memchr::memchr3_iter; let haystack = b"xyzaxyzbxyzc"; let mut it = memchr3_iter(b'a', b'b', b'c', haystack).rev(); assert_eq!(Some(11), it.next()); assert_eq!(Some(7), it.next()); assert_eq!(Some(3), it.next()); assert_eq!(None, it.next()); ``` # Example: iterating over substring matches This example shows how to use the [`memmem`] sub-module to find occurrences of a substring in a haystack. ``` use memchr::memmem; let haystack = b"foo bar foo baz foo"; let mut it = memmem::find_iter(haystack, "foo"); assert_eq!(Some(0), it.next()); assert_eq!(Some(8), it.next()); assert_eq!(Some(16), it.next()); assert_eq!(None, it.next()); ``` # Example: repeating a search for the same needle It may be possible for the overhead of constructing a substring searcher to be measurable in some workloads. In cases where the same needle is used to search many haystacks, it is possible to do construction once and thus to avoid it for subsequent searches. This can be done with a [`memmem::Finder`]: ``` use memchr::memmem; let finder = memmem::Finder::new("foo"); assert_eq!(Some(4), finder.find(b"baz foo quux")); assert_eq!(None, finder.find(b"quux baz bar")); ``` # Why use this crate? At first glance, the APIs provided by this crate might seem weird. Why provide a dedicated routine like `memchr` for something that could be implemented clearly and trivially in one line: ``` fn memchr(needle: u8, haystack: &[u8]) -> Option { haystack.iter().position(|&b| b == needle) } ``` Or similarly, why does this crate provide substring search routines when Rust's core library already provides them? ``` fn search(haystack: &str, needle: &str) -> Option { haystack.find(needle) } ``` The primary reason for both of them to exist is performance. When it comes to performance, at a high level at least, there are two primary ways to look at it: * **Throughput**: For this, think about it as, "given some very large haystack and a byte that never occurs in that haystack, how long does it take to search through it and determine that it, in fact, does not occur?" * **Latency**: For this, think about it as, "given a tiny haystack---just a few bytes---how long does it take to determine if a byte is in it?" The `memchr` routine in this crate has _slightly_ worse latency than the solution presented above, however, its throughput can easily be over an order of magnitude faster. This is a good general purpose trade off to make. You rarely lose, but often gain big. **NOTE:** The name `memchr` comes from the corresponding routine in libc. A key advantage of using this library is that its performance is not tied to its quality of implementation in the libc you happen to be using, which can vary greatly from platform to platform. But what about substring search? This one is a bit more complicated. The primary reason for its existence is still indeed performance, but it's also useful because Rust's core library doesn't actually expose any substring search routine on arbitrary bytes. The only substring search routine that exists works exclusively on valid UTF-8. So if you have valid UTF-8, is there a reason to use this over the standard library substring search routine? Yes. This routine is faster on almost every metric, including latency. The natural question then, is why isn't this implementation in the standard library, even if only for searching on UTF-8? The reason is that the implementation details for using SIMD in the standard library haven't quite been worked out yet. **NOTE:** Currently, only `x86_64` targets have highly accelerated implementations of substring search. For `memchr`, all targets have somewhat-accelerated implementations, while only `x86_64` targets have highly accelerated implementations. This limitation is expected to be lifted once the standard library exposes a platform independent SIMD API. # Crate features * **std** - When enabled (the default), this will permit this crate to use features specific to the standard library. Currently, the only thing used from the standard library is runtime SIMD CPU feature detection. This means that this feature must be enabled to get AVX accelerated routines. When `std` is not enabled, this crate will still attempt to use SSE2 accelerated routines on `x86_64`. * **libc** - When enabled (**not** the default), this library will use your platform's libc implementation of `memchr` (and `memrchr` on Linux). This can be useful on non-`x86_64` targets where the fallback implementation in this crate is not as good as the one found in your libc. All other routines (e.g., `memchr[23]` and substring search) unconditionally use the implementation in this crate. */ #![deny(missing_docs)] #![cfg_attr(not(feature = "std"), no_std)] // It's not worth trying to gate all code on just miri, so turn off relevant // dead code warnings. #![cfg_attr(miri, allow(dead_code, unused_macros))] // Supporting 8-bit (or others) would be fine. If you need it, please submit a // bug report at https://github.com/BurntSushi/memchr #[cfg(not(any( target_pointer_width = "16", target_pointer_width = "32", target_pointer_width = "64" )))] compile_error!("memchr currently not supported on non-{16,32,64}"); pub use crate::memchr::{ memchr, memchr2, memchr2_iter, memchr3, memchr3_iter, memchr_iter, memrchr, memrchr2, memrchr2_iter, memrchr3, memrchr3_iter, memrchr_iter, Memchr, Memchr2, Memchr3, }; mod cow; mod memchr; pub mod memmem; #[cfg(test)] mod tests; memchr-2.4.1/src/memchr/c.rs000064400000000000000000000022760072674642500137740ustar 00000000000000// This module defines safe wrappers around memchr (POSIX) and memrchr (GNU // extension). #![allow(dead_code)] use libc::{c_int, c_void, size_t}; pub fn memchr(needle: u8, haystack: &[u8]) -> Option { // SAFETY: This is safe to call since all pointers are valid. let p = unsafe { libc::memchr( haystack.as_ptr() as *const c_void, needle as c_int, haystack.len() as size_t, ) }; if p.is_null() { None } else { Some(p as usize - (haystack.as_ptr() as usize)) } } // memrchr is a GNU extension. We know it's available on Linux at least. #[cfg(target_os = "linux")] pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { // GNU's memrchr() will - unlike memchr() - error if haystack is empty. if haystack.is_empty() { return None; } // SAFETY: This is safe to call since all pointers are valid. let p = unsafe { libc::memrchr( haystack.as_ptr() as *const c_void, needle as c_int, haystack.len() as size_t, ) }; if p.is_null() { None } else { Some(p as usize - (haystack.as_ptr() as usize)) } } memchr-2.4.1/src/memchr/fallback.rs000064400000000000000000000256660072674642500153210ustar 00000000000000// This module defines pure Rust platform independent implementations of all // the memchr routines. We do our best to make them fast. Some of them may even // get auto-vectorized. use core::{cmp, usize}; #[cfg(target_pointer_width = "16")] const USIZE_BYTES: usize = 2; #[cfg(target_pointer_width = "32")] const USIZE_BYTES: usize = 4; #[cfg(target_pointer_width = "64")] const USIZE_BYTES: usize = 8; // The number of bytes to loop at in one iteration of memchr/memrchr. const LOOP_SIZE: usize = 2 * USIZE_BYTES; /// Return `true` if `x` contains any zero byte. /// /// From *Matters Computational*, J. Arndt /// /// "The idea is to subtract one from each of the bytes and then look for /// bytes where the borrow propagated all the way to the most significant /// bit." #[inline(always)] fn contains_zero_byte(x: usize) -> bool { const LO_U64: u64 = 0x0101010101010101; const HI_U64: u64 = 0x8080808080808080; const LO_USIZE: usize = LO_U64 as usize; const HI_USIZE: usize = HI_U64 as usize; x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0 } /// Repeat the given byte into a word size number. That is, every 8 bits /// is equivalent to the given byte. For example, if `b` is `\x4E` or /// `01001110` in binary, then the returned value on a 32-bit system would be: /// `01001110_01001110_01001110_01001110`. #[inline(always)] fn repeat_byte(b: u8) -> usize { (b as usize) * (usize::MAX / 255) } pub fn memchr(n1: u8, haystack: &[u8]) -> Option { let vn1 = repeat_byte(n1); let confirm = |byte| byte == n1; let loop_size = cmp::min(LOOP_SIZE, haystack.len()); let align = USIZE_BYTES - 1; let start_ptr = haystack.as_ptr(); let mut ptr = start_ptr; unsafe { let end_ptr = start_ptr.add(haystack.len()); if haystack.len() < USIZE_BYTES { return forward_search(start_ptr, end_ptr, ptr, confirm); } let chunk = (ptr as *const usize).read_unaligned(); if contains_zero_byte(chunk ^ vn1) { return forward_search(start_ptr, end_ptr, ptr, confirm); } ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); debug_assert!(ptr > start_ptr); debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) { debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); let a = *(ptr as *const usize); let b = *(ptr.add(USIZE_BYTES) as *const usize); let eqa = contains_zero_byte(a ^ vn1); let eqb = contains_zero_byte(b ^ vn1); if eqa || eqb { break; } ptr = ptr.add(LOOP_SIZE); } forward_search(start_ptr, end_ptr, ptr, confirm) } } /// Like `memchr`, but searches for two bytes instead of one. pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { let vn1 = repeat_byte(n1); let vn2 = repeat_byte(n2); let confirm = |byte| byte == n1 || byte == n2; let align = USIZE_BYTES - 1; let start_ptr = haystack.as_ptr(); let mut ptr = start_ptr; unsafe { let end_ptr = start_ptr.add(haystack.len()); if haystack.len() < USIZE_BYTES { return forward_search(start_ptr, end_ptr, ptr, confirm); } let chunk = (ptr as *const usize).read_unaligned(); let eq1 = contains_zero_byte(chunk ^ vn1); let eq2 = contains_zero_byte(chunk ^ vn2); if eq1 || eq2 { return forward_search(start_ptr, end_ptr, ptr, confirm); } ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); debug_assert!(ptr > start_ptr); debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); while ptr <= end_ptr.sub(USIZE_BYTES) { debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); let chunk = *(ptr as *const usize); let eq1 = contains_zero_byte(chunk ^ vn1); let eq2 = contains_zero_byte(chunk ^ vn2); if eq1 || eq2 { break; } ptr = ptr.add(USIZE_BYTES); } forward_search(start_ptr, end_ptr, ptr, confirm) } } /// Like `memchr`, but searches for three bytes instead of one. pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { let vn1 = repeat_byte(n1); let vn2 = repeat_byte(n2); let vn3 = repeat_byte(n3); let confirm = |byte| byte == n1 || byte == n2 || byte == n3; let align = USIZE_BYTES - 1; let start_ptr = haystack.as_ptr(); let mut ptr = start_ptr; unsafe { let end_ptr = start_ptr.add(haystack.len()); if haystack.len() < USIZE_BYTES { return forward_search(start_ptr, end_ptr, ptr, confirm); } let chunk = (ptr as *const usize).read_unaligned(); let eq1 = contains_zero_byte(chunk ^ vn1); let eq2 = contains_zero_byte(chunk ^ vn2); let eq3 = contains_zero_byte(chunk ^ vn3); if eq1 || eq2 || eq3 { return forward_search(start_ptr, end_ptr, ptr, confirm); } ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); debug_assert!(ptr > start_ptr); debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); while ptr <= end_ptr.sub(USIZE_BYTES) { debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); let chunk = *(ptr as *const usize); let eq1 = contains_zero_byte(chunk ^ vn1); let eq2 = contains_zero_byte(chunk ^ vn2); let eq3 = contains_zero_byte(chunk ^ vn3); if eq1 || eq2 || eq3 { break; } ptr = ptr.add(USIZE_BYTES); } forward_search(start_ptr, end_ptr, ptr, confirm) } } /// Return the last index matching the byte `x` in `text`. pub fn memrchr(n1: u8, haystack: &[u8]) -> Option { let vn1 = repeat_byte(n1); let confirm = |byte| byte == n1; let loop_size = cmp::min(LOOP_SIZE, haystack.len()); let align = USIZE_BYTES - 1; let start_ptr = haystack.as_ptr(); unsafe { let end_ptr = start_ptr.add(haystack.len()); let mut ptr = end_ptr; if haystack.len() < USIZE_BYTES { return reverse_search(start_ptr, end_ptr, ptr, confirm); } let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned(); if contains_zero_byte(chunk ^ vn1) { return reverse_search(start_ptr, end_ptr, ptr, confirm); } ptr = (end_ptr as usize & !align) as *const u8; debug_assert!(start_ptr <= ptr && ptr <= end_ptr); while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) { debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize); let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize); let eqa = contains_zero_byte(a ^ vn1); let eqb = contains_zero_byte(b ^ vn1); if eqa || eqb { break; } ptr = ptr.sub(loop_size); } reverse_search(start_ptr, end_ptr, ptr, confirm) } } /// Like `memrchr`, but searches for two bytes instead of one. pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { let vn1 = repeat_byte(n1); let vn2 = repeat_byte(n2); let confirm = |byte| byte == n1 || byte == n2; let align = USIZE_BYTES - 1; let start_ptr = haystack.as_ptr(); unsafe { let end_ptr = start_ptr.add(haystack.len()); let mut ptr = end_ptr; if haystack.len() < USIZE_BYTES { return reverse_search(start_ptr, end_ptr, ptr, confirm); } let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned(); let eq1 = contains_zero_byte(chunk ^ vn1); let eq2 = contains_zero_byte(chunk ^ vn2); if eq1 || eq2 { return reverse_search(start_ptr, end_ptr, ptr, confirm); } ptr = (end_ptr as usize & !align) as *const u8; debug_assert!(start_ptr <= ptr && ptr <= end_ptr); while ptr >= start_ptr.add(USIZE_BYTES) { debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); let chunk = *(ptr.sub(USIZE_BYTES) as *const usize); let eq1 = contains_zero_byte(chunk ^ vn1); let eq2 = contains_zero_byte(chunk ^ vn2); if eq1 || eq2 { break; } ptr = ptr.sub(USIZE_BYTES); } reverse_search(start_ptr, end_ptr, ptr, confirm) } } /// Like `memrchr`, but searches for three bytes instead of one. pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { let vn1 = repeat_byte(n1); let vn2 = repeat_byte(n2); let vn3 = repeat_byte(n3); let confirm = |byte| byte == n1 || byte == n2 || byte == n3; let align = USIZE_BYTES - 1; let start_ptr = haystack.as_ptr(); unsafe { let end_ptr = start_ptr.add(haystack.len()); let mut ptr = end_ptr; if haystack.len() < USIZE_BYTES { return reverse_search(start_ptr, end_ptr, ptr, confirm); } let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned(); let eq1 = contains_zero_byte(chunk ^ vn1); let eq2 = contains_zero_byte(chunk ^ vn2); let eq3 = contains_zero_byte(chunk ^ vn3); if eq1 || eq2 || eq3 { return reverse_search(start_ptr, end_ptr, ptr, confirm); } ptr = (end_ptr as usize & !align) as *const u8; debug_assert!(start_ptr <= ptr && ptr <= end_ptr); while ptr >= start_ptr.add(USIZE_BYTES) { debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); let chunk = *(ptr.sub(USIZE_BYTES) as *const usize); let eq1 = contains_zero_byte(chunk ^ vn1); let eq2 = contains_zero_byte(chunk ^ vn2); let eq3 = contains_zero_byte(chunk ^ vn3); if eq1 || eq2 || eq3 { break; } ptr = ptr.sub(USIZE_BYTES); } reverse_search(start_ptr, end_ptr, ptr, confirm) } } #[inline(always)] unsafe fn forward_search bool>( start_ptr: *const u8, end_ptr: *const u8, mut ptr: *const u8, confirm: F, ) -> Option { debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr); while ptr < end_ptr { if confirm(*ptr) { return Some(sub(ptr, start_ptr)); } ptr = ptr.offset(1); } None } #[inline(always)] unsafe fn reverse_search bool>( start_ptr: *const u8, end_ptr: *const u8, mut ptr: *const u8, confirm: F, ) -> Option { debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr); while ptr > start_ptr { ptr = ptr.offset(-1); if confirm(*ptr) { return Some(sub(ptr, start_ptr)); } } None } /// Subtract `b` from `a` and return the difference. `a` should be greater than /// or equal to `b`. fn sub(a: *const u8, b: *const u8) -> usize { debug_assert!(a >= b); (a as usize) - (b as usize) } memchr-2.4.1/src/memchr/iter.rs000064400000000000000000000103370072674642500145120ustar 00000000000000use crate::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3}; macro_rules! iter_next { // Common code for the memchr iterators: // update haystack and position and produce the index // // self: &mut Self where Self is the iterator // search_result: Option which is the result of the corresponding // memchr function. // // Returns Option (the next iterator element) ($self_:expr, $search_result:expr) => { $search_result.map(move |index| { // split and take the remaining back half $self_.haystack = $self_.haystack.split_at(index + 1).1; let found_position = $self_.position + index; $self_.position = found_position + 1; found_position }) }; } macro_rules! iter_next_back { ($self_:expr, $search_result:expr) => { $search_result.map(move |index| { // split and take the remaining front half $self_.haystack = $self_.haystack.split_at(index).0; $self_.position + index }) }; } /// An iterator for `memchr`. pub struct Memchr<'a> { needle: u8, // The haystack to iterate over haystack: &'a [u8], // The index position: usize, } impl<'a> Memchr<'a> { /// Creates a new iterator that yields all positions of needle in haystack. #[inline] pub fn new(needle: u8, haystack: &[u8]) -> Memchr<'_> { Memchr { needle: needle, haystack: haystack, position: 0 } } } impl<'a> Iterator for Memchr<'a> { type Item = usize; #[inline] fn next(&mut self) -> Option { iter_next!(self, memchr(self.needle, self.haystack)) } #[inline] fn size_hint(&self) -> (usize, Option) { (0, Some(self.haystack.len())) } } impl<'a> DoubleEndedIterator for Memchr<'a> { #[inline] fn next_back(&mut self) -> Option { iter_next_back!(self, memrchr(self.needle, self.haystack)) } } /// An iterator for `memchr2`. pub struct Memchr2<'a> { needle1: u8, needle2: u8, // The haystack to iterate over haystack: &'a [u8], // The index position: usize, } impl<'a> Memchr2<'a> { /// Creates a new iterator that yields all positions of needle in haystack. #[inline] pub fn new(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2<'_> { Memchr2 { needle1: needle1, needle2: needle2, haystack: haystack, position: 0, } } } impl<'a> Iterator for Memchr2<'a> { type Item = usize; #[inline] fn next(&mut self) -> Option { iter_next!(self, memchr2(self.needle1, self.needle2, self.haystack)) } #[inline] fn size_hint(&self) -> (usize, Option) { (0, Some(self.haystack.len())) } } impl<'a> DoubleEndedIterator for Memchr2<'a> { #[inline] fn next_back(&mut self) -> Option { iter_next_back!( self, memrchr2(self.needle1, self.needle2, self.haystack) ) } } /// An iterator for `memchr3`. pub struct Memchr3<'a> { needle1: u8, needle2: u8, needle3: u8, // The haystack to iterate over haystack: &'a [u8], // The index position: usize, } impl<'a> Memchr3<'a> { /// Create a new `Memchr3` that's initialized to zero with a haystack #[inline] pub fn new( needle1: u8, needle2: u8, needle3: u8, haystack: &[u8], ) -> Memchr3<'_> { Memchr3 { needle1: needle1, needle2: needle2, needle3: needle3, haystack: haystack, position: 0, } } } impl<'a> Iterator for Memchr3<'a> { type Item = usize; #[inline] fn next(&mut self) -> Option { iter_next!( self, memchr3(self.needle1, self.needle2, self.needle3, self.haystack) ) } #[inline] fn size_hint(&self) -> (usize, Option) { (0, Some(self.haystack.len())) } } impl<'a> DoubleEndedIterator for Memchr3<'a> { #[inline] fn next_back(&mut self) -> Option { iter_next_back!( self, memrchr3(self.needle1, self.needle2, self.needle3, self.haystack) ) } } memchr-2.4.1/src/memchr/mod.rs000064400000000000000000000273440072674642500143340ustar 00000000000000use core::iter::Rev; pub use self::iter::{Memchr, Memchr2, Memchr3}; // N.B. If you're looking for the cfg knobs for libc, see build.rs. #[cfg(memchr_libc)] mod c; #[allow(dead_code)] pub mod fallback; mod iter; pub mod naive; #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] mod x86; /// An iterator over all occurrences of the needle in a haystack. #[inline] pub fn memchr_iter(needle: u8, haystack: &[u8]) -> Memchr<'_> { Memchr::new(needle, haystack) } /// An iterator over all occurrences of the needles in a haystack. #[inline] pub fn memchr2_iter(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2<'_> { Memchr2::new(needle1, needle2, haystack) } /// An iterator over all occurrences of the needles in a haystack. #[inline] pub fn memchr3_iter( needle1: u8, needle2: u8, needle3: u8, haystack: &[u8], ) -> Memchr3<'_> { Memchr3::new(needle1, needle2, needle3, haystack) } /// An iterator over all occurrences of the needle in a haystack, in reverse. #[inline] pub fn memrchr_iter(needle: u8, haystack: &[u8]) -> Rev> { Memchr::new(needle, haystack).rev() } /// An iterator over all occurrences of the needles in a haystack, in reverse. #[inline] pub fn memrchr2_iter( needle1: u8, needle2: u8, haystack: &[u8], ) -> Rev> { Memchr2::new(needle1, needle2, haystack).rev() } /// An iterator over all occurrences of the needles in a haystack, in reverse. #[inline] pub fn memrchr3_iter( needle1: u8, needle2: u8, needle3: u8, haystack: &[u8], ) -> Rev> { Memchr3::new(needle1, needle2, needle3, haystack).rev() } /// Search for the first occurrence of a byte in a slice. /// /// This returns the index corresponding to the first occurrence of `needle` in /// `haystack`, or `None` if one is not found. If an index is returned, it is /// guaranteed to be less than `usize::MAX`. /// /// While this is operationally the same as something like /// `haystack.iter().position(|&b| b == needle)`, `memchr` will use a highly /// optimized routine that can be up to an order of magnitude faster in some /// cases. /// /// # Example /// /// This shows how to find the first position of a byte in a byte string. /// /// ``` /// use memchr::memchr; /// /// let haystack = b"the quick brown fox"; /// assert_eq!(memchr(b'k', haystack), Some(8)); /// ``` #[inline] pub fn memchr(needle: u8, haystack: &[u8]) -> Option { #[cfg(miri)] #[inline(always)] fn imp(n1: u8, haystack: &[u8]) -> Option { naive::memchr(n1, haystack) } #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] #[inline(always)] fn imp(n1: u8, haystack: &[u8]) -> Option { x86::memchr(n1, haystack) } #[cfg(all( memchr_libc, not(all(target_arch = "x86_64", memchr_runtime_simd)), not(miri), ))] #[inline(always)] fn imp(n1: u8, haystack: &[u8]) -> Option { c::memchr(n1, haystack) } #[cfg(all( not(memchr_libc), not(all(target_arch = "x86_64", memchr_runtime_simd)), not(miri), ))] #[inline(always)] fn imp(n1: u8, haystack: &[u8]) -> Option { fallback::memchr(n1, haystack) } if haystack.is_empty() { None } else { imp(needle, haystack) } } /// Like `memchr`, but searches for either of two bytes instead of just one. /// /// This returns the index corresponding to the first occurrence of `needle1` /// or the first occurrence of `needle2` in `haystack` (whichever occurs /// earlier), or `None` if neither one is found. If an index is returned, it is /// guaranteed to be less than `usize::MAX`. /// /// While this is operationally the same as something like /// `haystack.iter().position(|&b| b == needle1 || b == needle2)`, `memchr2` /// will use a highly optimized routine that can be up to an order of magnitude /// faster in some cases. /// /// # Example /// /// This shows how to find the first position of either of two bytes in a byte /// string. /// /// ``` /// use memchr::memchr2; /// /// let haystack = b"the quick brown fox"; /// assert_eq!(memchr2(b'k', b'q', haystack), Some(4)); /// ``` #[inline] pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option { #[cfg(miri)] #[inline(always)] fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { naive::memchr2(n1, n2, haystack) } #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] #[inline(always)] fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { x86::memchr2(n1, n2, haystack) } #[cfg(all( not(all(target_arch = "x86_64", memchr_runtime_simd)), not(miri), ))] #[inline(always)] fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { fallback::memchr2(n1, n2, haystack) } if haystack.is_empty() { None } else { imp(needle1, needle2, haystack) } } /// Like `memchr`, but searches for any of three bytes instead of just one. /// /// This returns the index corresponding to the first occurrence of `needle1`, /// the first occurrence of `needle2`, or the first occurrence of `needle3` in /// `haystack` (whichever occurs earliest), or `None` if none are found. If an /// index is returned, it is guaranteed to be less than `usize::MAX`. /// /// While this is operationally the same as something like /// `haystack.iter().position(|&b| b == needle1 || b == needle2 || /// b == needle3)`, `memchr3` will use a highly optimized routine that can be /// up to an order of magnitude faster in some cases. /// /// # Example /// /// This shows how to find the first position of any of three bytes in a byte /// string. /// /// ``` /// use memchr::memchr3; /// /// let haystack = b"the quick brown fox"; /// assert_eq!(memchr3(b'k', b'q', b'e', haystack), Some(2)); /// ``` #[inline] pub fn memchr3( needle1: u8, needle2: u8, needle3: u8, haystack: &[u8], ) -> Option { #[cfg(miri)] #[inline(always)] fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { naive::memchr3(n1, n2, n3, haystack) } #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] #[inline(always)] fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { x86::memchr3(n1, n2, n3, haystack) } #[cfg(all( not(all(target_arch = "x86_64", memchr_runtime_simd)), not(miri), ))] #[inline(always)] fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { fallback::memchr3(n1, n2, n3, haystack) } if haystack.is_empty() { None } else { imp(needle1, needle2, needle3, haystack) } } /// Search for the last occurrence of a byte in a slice. /// /// This returns the index corresponding to the last occurrence of `needle` in /// `haystack`, or `None` if one is not found. If an index is returned, it is /// guaranteed to be less than `usize::MAX`. /// /// While this is operationally the same as something like /// `haystack.iter().rposition(|&b| b == needle)`, `memrchr` will use a highly /// optimized routine that can be up to an order of magnitude faster in some /// cases. /// /// # Example /// /// This shows how to find the last position of a byte in a byte string. /// /// ``` /// use memchr::memrchr; /// /// let haystack = b"the quick brown fox"; /// assert_eq!(memrchr(b'o', haystack), Some(17)); /// ``` #[inline] pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { #[cfg(miri)] #[inline(always)] fn imp(n1: u8, haystack: &[u8]) -> Option { naive::memrchr(n1, haystack) } #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] #[inline(always)] fn imp(n1: u8, haystack: &[u8]) -> Option { x86::memrchr(n1, haystack) } #[cfg(all( memchr_libc, target_os = "linux", not(all(target_arch = "x86_64", memchr_runtime_simd)), not(miri) ))] #[inline(always)] fn imp(n1: u8, haystack: &[u8]) -> Option { c::memrchr(n1, haystack) } #[cfg(all( not(all(memchr_libc, target_os = "linux")), not(all(target_arch = "x86_64", memchr_runtime_simd)), not(miri), ))] #[inline(always)] fn imp(n1: u8, haystack: &[u8]) -> Option { fallback::memrchr(n1, haystack) } if haystack.is_empty() { None } else { imp(needle, haystack) } } /// Like `memrchr`, but searches for either of two bytes instead of just one. /// /// This returns the index corresponding to the last occurrence of `needle1` or /// the last occurrence of `needle2` in `haystack` (whichever occurs later), or /// `None` if neither one is found. If an index is returned, it is guaranteed /// to be less than `usize::MAX`. /// /// While this is operationally the same as something like /// `haystack.iter().rposition(|&b| b == needle1 || b == needle2)`, `memrchr2` /// will use a highly optimized routine that can be up to an order of magnitude /// faster in some cases. /// /// # Example /// /// This shows how to find the last position of either of two bytes in a byte /// string. /// /// ``` /// use memchr::memrchr2; /// /// let haystack = b"the quick brown fox"; /// assert_eq!(memrchr2(b'k', b'q', haystack), Some(8)); /// ``` #[inline] pub fn memrchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option { #[cfg(miri)] #[inline(always)] fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { naive::memrchr2(n1, n2, haystack) } #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] #[inline(always)] fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { x86::memrchr2(n1, n2, haystack) } #[cfg(all( not(all(target_arch = "x86_64", memchr_runtime_simd)), not(miri), ))] #[inline(always)] fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { fallback::memrchr2(n1, n2, haystack) } if haystack.is_empty() { None } else { imp(needle1, needle2, haystack) } } /// Like `memrchr`, but searches for any of three bytes instead of just one. /// /// This returns the index corresponding to the last occurrence of `needle1`, /// the last occurrence of `needle2`, or the last occurrence of `needle3` in /// `haystack` (whichever occurs later), or `None` if none are found. If an /// index is returned, it is guaranteed to be less than `usize::MAX`. /// /// While this is operationally the same as something like /// `haystack.iter().rposition(|&b| b == needle1 || b == needle2 || /// b == needle3)`, `memrchr3` will use a highly optimized routine that can be /// up to an order of magnitude faster in some cases. /// /// # Example /// /// This shows how to find the last position of any of three bytes in a byte /// string. /// /// ``` /// use memchr::memrchr3; /// /// let haystack = b"the quick brown fox"; /// assert_eq!(memrchr3(b'k', b'q', b'e', haystack), Some(8)); /// ``` #[inline] pub fn memrchr3( needle1: u8, needle2: u8, needle3: u8, haystack: &[u8], ) -> Option { #[cfg(miri)] #[inline(always)] fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { naive::memrchr3(n1, n2, n3, haystack) } #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] #[inline(always)] fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { x86::memrchr3(n1, n2, n3, haystack) } #[cfg(all( not(all(target_arch = "x86_64", memchr_runtime_simd)), not(miri), ))] #[inline(always)] fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { fallback::memrchr3(n1, n2, n3, haystack) } if haystack.is_empty() { None } else { imp(needle1, needle2, needle3, haystack) } } memchr-2.4.1/src/memchr/naive.rs000064400000000000000000000014010072674642500146410ustar 00000000000000#![allow(dead_code)] pub fn memchr(n1: u8, haystack: &[u8]) -> Option { haystack.iter().position(|&b| b == n1) } pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { haystack.iter().position(|&b| b == n1 || b == n2) } pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { haystack.iter().position(|&b| b == n1 || b == n2 || b == n3) } pub fn memrchr(n1: u8, haystack: &[u8]) -> Option { haystack.iter().rposition(|&b| b == n1) } pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { haystack.iter().rposition(|&b| b == n1 || b == n2) } pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3) } memchr-2.4.1/src/memchr/x86/avx.rs000064400000000000000000000634060072674642500147770ustar 00000000000000use core::{arch::x86_64::*, cmp, mem::size_of}; use super::sse2; const VECTOR_SIZE: usize = size_of::<__m256i>(); const VECTOR_ALIGN: usize = VECTOR_SIZE - 1; // The number of bytes to loop at in one iteration of memchr/memrchr. const LOOP_SIZE: usize = 4 * VECTOR_SIZE; // The number of bytes to loop at in one iteration of memchr2/memrchr2 and // memchr3/memrchr3. There was no observable difference between 128 and 64 // bytes in benchmarks. memchr3 in particular only gets a very slight speed up // from the loop unrolling. const LOOP_SIZE2: usize = 2 * VECTOR_SIZE; #[target_feature(enable = "avx2")] pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option { // For a high level explanation for how this algorithm works, see the // sse2 implementation. The avx implementation here is the same, but with // 256-bit vectors instead of 128-bit vectors. // This routine is called whenever a match is detected. It is specifically // marked as unlineable because it improves the codegen of the unrolled // loop below. Inlining this seems to cause codegen with some extra adds // and a load that aren't necessary. This seems to result in about a 10% // improvement for the memchr1/crate/huge/never benchmark. // // Interestingly, I couldn't observe a similar improvement for memrchr. #[cold] #[inline(never)] #[target_feature(enable = "avx2")] unsafe fn matched( start_ptr: *const u8, ptr: *const u8, eqa: __m256i, eqb: __m256i, eqc: __m256i, eqd: __m256i, ) -> usize { let mut at = sub(ptr, start_ptr); let mask = _mm256_movemask_epi8(eqa); if mask != 0 { return at + forward_pos(mask); } at += VECTOR_SIZE; let mask = _mm256_movemask_epi8(eqb); if mask != 0 { return at + forward_pos(mask); } at += VECTOR_SIZE; let mask = _mm256_movemask_epi8(eqc); if mask != 0 { return at + forward_pos(mask); } at += VECTOR_SIZE; let mask = _mm256_movemask_epi8(eqd); debug_assert!(mask != 0); at + forward_pos(mask) } let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let mut ptr = start_ptr; if haystack.len() < VECTOR_SIZE { // For small haystacks, defer to the SSE2 implementation. Codegen // suggests this completely avoids touching the AVX vectors. return sse2::memchr(n1, haystack); } let vn1 = _mm256_set1_epi8(n1 as i8); let loop_size = cmp::min(LOOP_SIZE, haystack.len()); if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) { return Some(i); } ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) { debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); let a = _mm256_load_si256(ptr as *const __m256i); let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i); let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i); let eqa = _mm256_cmpeq_epi8(vn1, a); let eqb = _mm256_cmpeq_epi8(vn1, b); let eqc = _mm256_cmpeq_epi8(vn1, c); let eqd = _mm256_cmpeq_epi8(vn1, d); let or1 = _mm256_or_si256(eqa, eqb); let or2 = _mm256_or_si256(eqc, eqd); let or3 = _mm256_or_si256(or1, or2); if _mm256_movemask_epi8(or3) != 0 { return Some(matched(start_ptr, ptr, eqa, eqb, eqc, eqd)); } ptr = ptr.add(loop_size); } while ptr <= end_ptr.sub(VECTOR_SIZE) { debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE); if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) { return Some(i); } ptr = ptr.add(VECTOR_SIZE); } if ptr < end_ptr { debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); return forward_search1(start_ptr, end_ptr, ptr, vn1); } None } #[target_feature(enable = "avx2")] pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { #[cold] #[inline(never)] #[target_feature(enable = "avx2")] unsafe fn matched( start_ptr: *const u8, ptr: *const u8, eqa1: __m256i, eqa2: __m256i, eqb1: __m256i, eqb2: __m256i, ) -> usize { let mut at = sub(ptr, start_ptr); let mask1 = _mm256_movemask_epi8(eqa1); let mask2 = _mm256_movemask_epi8(eqa2); if mask1 != 0 || mask2 != 0 { return at + forward_pos2(mask1, mask2); } at += VECTOR_SIZE; let mask1 = _mm256_movemask_epi8(eqb1); let mask2 = _mm256_movemask_epi8(eqb2); at + forward_pos2(mask1, mask2) } let vn1 = _mm256_set1_epi8(n1 as i8); let vn2 = _mm256_set1_epi8(n2 as i8); let len = haystack.len(); let loop_size = cmp::min(LOOP_SIZE2, len); let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let mut ptr = start_ptr; if haystack.len() < VECTOR_SIZE { while ptr < end_ptr { if *ptr == n1 || *ptr == n2 { return Some(sub(ptr, start_ptr)); } ptr = ptr.offset(1); } return None; } if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) { return Some(i); } ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) { debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); let a = _mm256_load_si256(ptr as *const __m256i); let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); let eqa1 = _mm256_cmpeq_epi8(vn1, a); let eqb1 = _mm256_cmpeq_epi8(vn1, b); let eqa2 = _mm256_cmpeq_epi8(vn2, a); let eqb2 = _mm256_cmpeq_epi8(vn2, b); let or1 = _mm256_or_si256(eqa1, eqb1); let or2 = _mm256_or_si256(eqa2, eqb2); let or3 = _mm256_or_si256(or1, or2); if _mm256_movemask_epi8(or3) != 0 { return Some(matched(start_ptr, ptr, eqa1, eqa2, eqb1, eqb2)); } ptr = ptr.add(loop_size); } while ptr <= end_ptr.sub(VECTOR_SIZE) { if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) { return Some(i); } ptr = ptr.add(VECTOR_SIZE); } if ptr < end_ptr { debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2); } None } #[target_feature(enable = "avx2")] pub unsafe fn memchr3( n1: u8, n2: u8, n3: u8, haystack: &[u8], ) -> Option { #[cold] #[inline(never)] #[target_feature(enable = "avx2")] unsafe fn matched( start_ptr: *const u8, ptr: *const u8, eqa1: __m256i, eqa2: __m256i, eqa3: __m256i, eqb1: __m256i, eqb2: __m256i, eqb3: __m256i, ) -> usize { let mut at = sub(ptr, start_ptr); let mask1 = _mm256_movemask_epi8(eqa1); let mask2 = _mm256_movemask_epi8(eqa2); let mask3 = _mm256_movemask_epi8(eqa3); if mask1 != 0 || mask2 != 0 || mask3 != 0 { return at + forward_pos3(mask1, mask2, mask3); } at += VECTOR_SIZE; let mask1 = _mm256_movemask_epi8(eqb1); let mask2 = _mm256_movemask_epi8(eqb2); let mask3 = _mm256_movemask_epi8(eqb3); at + forward_pos3(mask1, mask2, mask3) } let vn1 = _mm256_set1_epi8(n1 as i8); let vn2 = _mm256_set1_epi8(n2 as i8); let vn3 = _mm256_set1_epi8(n3 as i8); let len = haystack.len(); let loop_size = cmp::min(LOOP_SIZE2, len); let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let mut ptr = start_ptr; if haystack.len() < VECTOR_SIZE { while ptr < end_ptr { if *ptr == n1 || *ptr == n2 || *ptr == n3 { return Some(sub(ptr, start_ptr)); } ptr = ptr.offset(1); } return None; } if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { return Some(i); } ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) { debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); let a = _mm256_load_si256(ptr as *const __m256i); let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); let eqa1 = _mm256_cmpeq_epi8(vn1, a); let eqb1 = _mm256_cmpeq_epi8(vn1, b); let eqa2 = _mm256_cmpeq_epi8(vn2, a); let eqb2 = _mm256_cmpeq_epi8(vn2, b); let eqa3 = _mm256_cmpeq_epi8(vn3, a); let eqb3 = _mm256_cmpeq_epi8(vn3, b); let or1 = _mm256_or_si256(eqa1, eqb1); let or2 = _mm256_or_si256(eqa2, eqb2); let or3 = _mm256_or_si256(eqa3, eqb3); let or4 = _mm256_or_si256(or1, or2); let or5 = _mm256_or_si256(or3, or4); if _mm256_movemask_epi8(or5) != 0 { return Some(matched( start_ptr, ptr, eqa1, eqa2, eqa3, eqb1, eqb2, eqb3, )); } ptr = ptr.add(loop_size); } while ptr <= end_ptr.sub(VECTOR_SIZE) { if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { return Some(i); } ptr = ptr.add(VECTOR_SIZE); } if ptr < end_ptr { debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3); } None } #[target_feature(enable = "avx2")] pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option { let vn1 = _mm256_set1_epi8(n1 as i8); let len = haystack.len(); let loop_size = cmp::min(LOOP_SIZE, len); let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let mut ptr = end_ptr; if haystack.len() < VECTOR_SIZE { while ptr > start_ptr { ptr = ptr.offset(-1); if *ptr == n1 { return Some(sub(ptr, start_ptr)); } } return None; } ptr = ptr.sub(VECTOR_SIZE); if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) { return Some(i); } ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; debug_assert!(start_ptr <= ptr && ptr <= end_ptr); while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) { debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); ptr = ptr.sub(loop_size); let a = _mm256_load_si256(ptr as *const __m256i); let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i); let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i); let eqa = _mm256_cmpeq_epi8(vn1, a); let eqb = _mm256_cmpeq_epi8(vn1, b); let eqc = _mm256_cmpeq_epi8(vn1, c); let eqd = _mm256_cmpeq_epi8(vn1, d); let or1 = _mm256_or_si256(eqa, eqb); let or2 = _mm256_or_si256(eqc, eqd); let or3 = _mm256_or_si256(or1, or2); if _mm256_movemask_epi8(or3) != 0 { let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr); let mask = _mm256_movemask_epi8(eqd); if mask != 0 { return Some(at + reverse_pos(mask)); } at -= VECTOR_SIZE; let mask = _mm256_movemask_epi8(eqc); if mask != 0 { return Some(at + reverse_pos(mask)); } at -= VECTOR_SIZE; let mask = _mm256_movemask_epi8(eqb); if mask != 0 { return Some(at + reverse_pos(mask)); } at -= VECTOR_SIZE; let mask = _mm256_movemask_epi8(eqa); debug_assert!(mask != 0); return Some(at + reverse_pos(mask)); } } while ptr >= start_ptr.add(VECTOR_SIZE) { ptr = ptr.sub(VECTOR_SIZE); if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) { return Some(i); } } if ptr > start_ptr { debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); return reverse_search1(start_ptr, end_ptr, start_ptr, vn1); } None } #[target_feature(enable = "avx2")] pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { let vn1 = _mm256_set1_epi8(n1 as i8); let vn2 = _mm256_set1_epi8(n2 as i8); let len = haystack.len(); let loop_size = cmp::min(LOOP_SIZE2, len); let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let mut ptr = end_ptr; if haystack.len() < VECTOR_SIZE { while ptr > start_ptr { ptr = ptr.offset(-1); if *ptr == n1 || *ptr == n2 { return Some(sub(ptr, start_ptr)); } } return None; } ptr = ptr.sub(VECTOR_SIZE); if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) { return Some(i); } ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; debug_assert!(start_ptr <= ptr && ptr <= end_ptr); while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) { debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); ptr = ptr.sub(loop_size); let a = _mm256_load_si256(ptr as *const __m256i); let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); let eqa1 = _mm256_cmpeq_epi8(vn1, a); let eqb1 = _mm256_cmpeq_epi8(vn1, b); let eqa2 = _mm256_cmpeq_epi8(vn2, a); let eqb2 = _mm256_cmpeq_epi8(vn2, b); let or1 = _mm256_or_si256(eqa1, eqb1); let or2 = _mm256_or_si256(eqa2, eqb2); let or3 = _mm256_or_si256(or1, or2); if _mm256_movemask_epi8(or3) != 0 { let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr); let mask1 = _mm256_movemask_epi8(eqb1); let mask2 = _mm256_movemask_epi8(eqb2); if mask1 != 0 || mask2 != 0 { return Some(at + reverse_pos2(mask1, mask2)); } at -= VECTOR_SIZE; let mask1 = _mm256_movemask_epi8(eqa1); let mask2 = _mm256_movemask_epi8(eqa2); return Some(at + reverse_pos2(mask1, mask2)); } } while ptr >= start_ptr.add(VECTOR_SIZE) { ptr = ptr.sub(VECTOR_SIZE); if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) { return Some(i); } } if ptr > start_ptr { debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2); } None } #[target_feature(enable = "avx2")] pub unsafe fn memrchr3( n1: u8, n2: u8, n3: u8, haystack: &[u8], ) -> Option { let vn1 = _mm256_set1_epi8(n1 as i8); let vn2 = _mm256_set1_epi8(n2 as i8); let vn3 = _mm256_set1_epi8(n3 as i8); let len = haystack.len(); let loop_size = cmp::min(LOOP_SIZE2, len); let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let mut ptr = end_ptr; if haystack.len() < VECTOR_SIZE { while ptr > start_ptr { ptr = ptr.offset(-1); if *ptr == n1 || *ptr == n2 || *ptr == n3 { return Some(sub(ptr, start_ptr)); } } return None; } ptr = ptr.sub(VECTOR_SIZE); if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { return Some(i); } ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; debug_assert!(start_ptr <= ptr && ptr <= end_ptr); while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) { debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); ptr = ptr.sub(loop_size); let a = _mm256_load_si256(ptr as *const __m256i); let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); let eqa1 = _mm256_cmpeq_epi8(vn1, a); let eqb1 = _mm256_cmpeq_epi8(vn1, b); let eqa2 = _mm256_cmpeq_epi8(vn2, a); let eqb2 = _mm256_cmpeq_epi8(vn2, b); let eqa3 = _mm256_cmpeq_epi8(vn3, a); let eqb3 = _mm256_cmpeq_epi8(vn3, b); let or1 = _mm256_or_si256(eqa1, eqb1); let or2 = _mm256_or_si256(eqa2, eqb2); let or3 = _mm256_or_si256(eqa3, eqb3); let or4 = _mm256_or_si256(or1, or2); let or5 = _mm256_or_si256(or3, or4); if _mm256_movemask_epi8(or5) != 0 { let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr); let mask1 = _mm256_movemask_epi8(eqb1); let mask2 = _mm256_movemask_epi8(eqb2); let mask3 = _mm256_movemask_epi8(eqb3); if mask1 != 0 || mask2 != 0 || mask3 != 0 { return Some(at + reverse_pos3(mask1, mask2, mask3)); } at -= VECTOR_SIZE; let mask1 = _mm256_movemask_epi8(eqa1); let mask2 = _mm256_movemask_epi8(eqa2); let mask3 = _mm256_movemask_epi8(eqa3); return Some(at + reverse_pos3(mask1, mask2, mask3)); } } while ptr >= start_ptr.add(VECTOR_SIZE) { ptr = ptr.sub(VECTOR_SIZE); if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { return Some(i); } } if ptr > start_ptr { debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3); } None } #[target_feature(enable = "avx2")] unsafe fn forward_search1( start_ptr: *const u8, end_ptr: *const u8, ptr: *const u8, vn1: __m256i, ) -> Option { debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); let chunk = _mm256_loadu_si256(ptr as *const __m256i); let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, vn1)); if mask != 0 { Some(sub(ptr, start_ptr) + forward_pos(mask)) } else { None } } #[target_feature(enable = "avx2")] unsafe fn forward_search2( start_ptr: *const u8, end_ptr: *const u8, ptr: *const u8, vn1: __m256i, vn2: __m256i, ) -> Option { debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); let chunk = _mm256_loadu_si256(ptr as *const __m256i); let eq1 = _mm256_cmpeq_epi8(chunk, vn1); let eq2 = _mm256_cmpeq_epi8(chunk, vn2); if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 { let mask1 = _mm256_movemask_epi8(eq1); let mask2 = _mm256_movemask_epi8(eq2); Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2)) } else { None } } #[target_feature(enable = "avx2")] unsafe fn forward_search3( start_ptr: *const u8, end_ptr: *const u8, ptr: *const u8, vn1: __m256i, vn2: __m256i, vn3: __m256i, ) -> Option { debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); let chunk = _mm256_loadu_si256(ptr as *const __m256i); let eq1 = _mm256_cmpeq_epi8(chunk, vn1); let eq2 = _mm256_cmpeq_epi8(chunk, vn2); let eq3 = _mm256_cmpeq_epi8(chunk, vn3); let or = _mm256_or_si256(eq1, eq2); if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 { let mask1 = _mm256_movemask_epi8(eq1); let mask2 = _mm256_movemask_epi8(eq2); let mask3 = _mm256_movemask_epi8(eq3); Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3)) } else { None } } #[target_feature(enable = "avx2")] unsafe fn reverse_search1( start_ptr: *const u8, end_ptr: *const u8, ptr: *const u8, vn1: __m256i, ) -> Option { debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); let chunk = _mm256_loadu_si256(ptr as *const __m256i); let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(vn1, chunk)); if mask != 0 { Some(sub(ptr, start_ptr) + reverse_pos(mask)) } else { None } } #[target_feature(enable = "avx2")] unsafe fn reverse_search2( start_ptr: *const u8, end_ptr: *const u8, ptr: *const u8, vn1: __m256i, vn2: __m256i, ) -> Option { debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); let chunk = _mm256_loadu_si256(ptr as *const __m256i); let eq1 = _mm256_cmpeq_epi8(chunk, vn1); let eq2 = _mm256_cmpeq_epi8(chunk, vn2); if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 { let mask1 = _mm256_movemask_epi8(eq1); let mask2 = _mm256_movemask_epi8(eq2); Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2)) } else { None } } #[target_feature(enable = "avx2")] unsafe fn reverse_search3( start_ptr: *const u8, end_ptr: *const u8, ptr: *const u8, vn1: __m256i, vn2: __m256i, vn3: __m256i, ) -> Option { debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); let chunk = _mm256_loadu_si256(ptr as *const __m256i); let eq1 = _mm256_cmpeq_epi8(chunk, vn1); let eq2 = _mm256_cmpeq_epi8(chunk, vn2); let eq3 = _mm256_cmpeq_epi8(chunk, vn3); let or = _mm256_or_si256(eq1, eq2); if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 { let mask1 = _mm256_movemask_epi8(eq1); let mask2 = _mm256_movemask_epi8(eq2); let mask3 = _mm256_movemask_epi8(eq3); Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3)) } else { None } } /// Compute the position of the first matching byte from the given mask. The /// position returned is always in the range [0, 31]. /// /// The mask given is expected to be the result of _mm256_movemask_epi8. fn forward_pos(mask: i32) -> usize { // We are dealing with little endian here, where the most significant byte // is at a higher address. That means the least significant bit that is set // corresponds to the position of our first matching byte. That position // corresponds to the number of zeros after the least significant bit. mask.trailing_zeros() as usize } /// Compute the position of the first matching byte from the given masks. The /// position returned is always in the range [0, 31]. Each mask corresponds to /// the equality comparison of a single byte. /// /// The masks given are expected to be the result of _mm256_movemask_epi8, /// where at least one of the masks is non-zero (i.e., indicates a match). fn forward_pos2(mask1: i32, mask2: i32) -> usize { debug_assert!(mask1 != 0 || mask2 != 0); forward_pos(mask1 | mask2) } /// Compute the position of the first matching byte from the given masks. The /// position returned is always in the range [0, 31]. Each mask corresponds to /// the equality comparison of a single byte. /// /// The masks given are expected to be the result of _mm256_movemask_epi8, /// where at least one of the masks is non-zero (i.e., indicates a match). fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize { debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0); forward_pos(mask1 | mask2 | mask3) } /// Compute the position of the last matching byte from the given mask. The /// position returned is always in the range [0, 31]. /// /// The mask given is expected to be the result of _mm256_movemask_epi8. fn reverse_pos(mask: i32) -> usize { // We are dealing with little endian here, where the most significant byte // is at a higher address. That means the most significant bit that is set // corresponds to the position of our last matching byte. The position from // the end of the mask is therefore the number of leading zeros in a 32 // bit integer, and the position from the start of the mask is therefore // 32 - (leading zeros) - 1. VECTOR_SIZE - (mask as u32).leading_zeros() as usize - 1 } /// Compute the position of the last matching byte from the given masks. The /// position returned is always in the range [0, 31]. Each mask corresponds to /// the equality comparison of a single byte. /// /// The masks given are expected to be the result of _mm256_movemask_epi8, /// where at least one of the masks is non-zero (i.e., indicates a match). fn reverse_pos2(mask1: i32, mask2: i32) -> usize { debug_assert!(mask1 != 0 || mask2 != 0); reverse_pos(mask1 | mask2) } /// Compute the position of the last matching byte from the given masks. The /// position returned is always in the range [0, 31]. Each mask corresponds to /// the equality comparison of a single byte. /// /// The masks given are expected to be the result of _mm256_movemask_epi8, /// where at least one of the masks is non-zero (i.e., indicates a match). fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize { debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0); reverse_pos(mask1 | mask2 | mask3) } /// Subtract `b` from `a` and return the difference. `a` should be greater than /// or equal to `b`. fn sub(a: *const u8, b: *const u8) -> usize { debug_assert!(a >= b); (a as usize) - (b as usize) } memchr-2.4.1/src/memchr/x86/mod.rs000064400000000000000000000121540072674642500147520ustar 00000000000000use super::fallback; // We only use AVX when we can detect at runtime whether it's available, which // requires std. #[cfg(feature = "std")] mod avx; mod sse2; /// This macro employs a gcc-like "ifunc" trick where by upon first calling /// `memchr` (for example), CPU feature detection will be performed at runtime /// to determine the best implementation to use. After CPU feature detection /// is done, we replace `memchr`'s function pointer with the selection. Upon /// subsequent invocations, the CPU-specific routine is invoked directly, which /// skips the CPU feature detection and subsequent branch that's required. /// /// While this typically doesn't matter for rare occurrences or when used on /// larger haystacks, `memchr` can be called in tight loops where the overhead /// of this branch can actually add up *and is measurable*. This trick was /// necessary to bring this implementation up to glibc's speeds for the 'tiny' /// benchmarks, for example. /// /// At some point, I expect the Rust ecosystem will get a nice macro for doing /// exactly this, at which point, we can replace our hand-jammed version of it. /// /// N.B. The ifunc strategy does prevent function inlining of course, but /// on modern CPUs, you'll probably end up with the AVX2 implementation, /// which probably can't be inlined anyway---unless you've compiled your /// entire program with AVX2 enabled. However, even then, the various memchr /// implementations aren't exactly small, so inlining might not help anyway! /// /// # Safety /// /// Callers must ensure that fnty is function pointer type. #[cfg(feature = "std")] macro_rules! unsafe_ifunc { ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{ use std::{mem, sync::atomic::{AtomicPtr, Ordering}}; type FnRaw = *mut (); static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw); fn detect($($needle: u8),+, haystack: &[u8]) -> Option { let fun = if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") { avx::$name as FnRaw } else if cfg!(memchr_runtime_sse2) { sse2::$name as FnRaw } else { fallback::$name as FnRaw }; FN.store(fun as FnRaw, Ordering::Relaxed); // SAFETY: By virtue of the caller contract, $fnty is a function // pointer, which is always safe to transmute with a *mut (). // Also, if 'fun is the AVX routine, then it is guaranteed to be // supported since we checked the avx2 feature. unsafe { mem::transmute::(fun)($($needle),+, haystack) } } // SAFETY: By virtue of the caller contract, $fnty is a function // pointer, which is always safe to transmute with a *mut (). Also, if // 'fun is the AVX routine, then it is guaranteed to be supported since // we checked the avx2 feature. unsafe { let fun = FN.load(Ordering::Relaxed); mem::transmute::(fun)($($needle),+, $haystack) } }} } /// When std isn't available to provide runtime CPU feature detection, or if /// runtime CPU feature detection has been explicitly disabled, then just /// call our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64 /// targets, so no CPU feature detection is necessary. /// /// # Safety /// /// There are no safety requirements for this definition of the macro. It is /// safe for all inputs since it is restricted to either the fallback routine /// or the SSE routine, which is always safe to call on x86_64. #[cfg(not(feature = "std"))] macro_rules! unsafe_ifunc { ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{ if cfg!(memchr_runtime_sse2) { unsafe { sse2::$name($($needle),+, $haystack) } } else { fallback::$name($($needle),+, $haystack) } }} } #[inline(always)] pub fn memchr(n1: u8, haystack: &[u8]) -> Option { unsafe_ifunc!(fn(u8, &[u8]) -> Option, memchr, haystack, n1) } #[inline(always)] pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { unsafe_ifunc!( fn(u8, u8, &[u8]) -> Option, memchr2, haystack, n1, n2 ) } #[inline(always)] pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { unsafe_ifunc!( fn(u8, u8, u8, &[u8]) -> Option, memchr3, haystack, n1, n2, n3 ) } #[inline(always)] pub fn memrchr(n1: u8, haystack: &[u8]) -> Option { unsafe_ifunc!(fn(u8, &[u8]) -> Option, memrchr, haystack, n1) } #[inline(always)] pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { unsafe_ifunc!( fn(u8, u8, &[u8]) -> Option, memrchr2, haystack, n1, n2 ) } #[inline(always)] pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { unsafe_ifunc!( fn(u8, u8, u8, &[u8]) -> Option, memrchr3, haystack, n1, n2, n3 ) } memchr-2.4.1/src/memchr/x86/sse2.rs000064400000000000000000000716020072674642500150520ustar 00000000000000use core::{arch::x86_64::*, cmp, mem::size_of}; const VECTOR_SIZE: usize = size_of::<__m128i>(); const VECTOR_ALIGN: usize = VECTOR_SIZE - 1; // The number of bytes to loop at in one iteration of memchr/memrchr. const LOOP_SIZE: usize = 4 * VECTOR_SIZE; // The number of bytes to loop at in one iteration of memchr2/memrchr2 and // memchr3/memrchr3. There was no observable difference between 64 and 32 bytes // in benchmarks. memchr3 in particular only gets a very slight speed up from // the loop unrolling. const LOOP_SIZE2: usize = 2 * VECTOR_SIZE; #[target_feature(enable = "sse2")] pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option { // What follows is a fast SSE2-only algorithm to detect the position of // `n1` in `haystack` if it exists. From what I know, this is the "classic" // algorithm. I believe it can be found in places like glibc and Go's // standard library. It appears to be well known and is elaborated on in // more detail here: https://gms.tf/stdfind-and-memchr-optimizations.html // // While this routine is very long, the basic idea is actually very simple // and can be expressed straight-forwardly in pseudo code: // // needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1 // // Note: shift amount in bytes // // while i <= haystack.len() - 16: // // A 16 byte vector. Each byte in chunk corresponds to a byte in // // the haystack. // chunk = haystack[i:i+16] // // Compare bytes in needle with bytes in chunk. The result is a 16 // // byte chunk where each byte is 0xFF if the corresponding bytes // // in needle and chunk were equal, or 0x00 otherwise. // eqs = cmpeq(needle, chunk) // // Return a 32 bit integer where the most significant 16 bits // // are always 0 and the lower 16 bits correspond to whether the // // most significant bit in the correspond byte in `eqs` is set. // // In other words, `mask as u16` has bit i set if and only if // // needle[i] == chunk[i]. // mask = movemask(eqs) // // // Mask is 0 if there is no match, and non-zero otherwise. // if mask != 0: // // trailing_zeros tells us the position of the least significant // // bit that is set. // return i + trailing_zeros(mask) // // // haystack length may not be a multiple of 16, so search the rest. // while i < haystack.len(): // if haystack[i] == n1: // return i // // // No match found. // return NULL // // In fact, we could loosely translate the above code to Rust line-for-line // and it would be a pretty fast algorithm. But, we pull out all the stops // to go as fast as possible: // // 1. We use aligned loads. That is, we do some finagling to make sure our // primary loop not only proceeds in increments of 16 bytes, but that // the address of haystack's pointer that we dereference is aligned to // 16 bytes. 16 is a magic number here because it is the size of SSE2 // 128-bit vector. (For the AVX2 algorithm, 32 is the magic number.) // Therefore, to get aligned loads, our pointer's address must be evenly // divisible by 16. // 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's // kind of like loop unrolling, but we combine the equality comparisons // using a vector OR such that we only need to extract a single mask to // determine whether a match exists or not. If so, then we do some // book-keeping to determine the precise location but otherwise mush on. // 3. We use our "chunk" comparison routine in as many places as possible, // even if it means using unaligned loads. In particular, if haystack // starts with an unaligned address, then we do an unaligned load to // search the first 16 bytes. We then start our primary loop at the // smallest subsequent aligned address, which will actually overlap with // previously searched bytes. But we're OK with that. We do a similar // dance at the end of our primary loop. Finally, to avoid a // byte-at-a-time loop at the end, we do a final 16 byte unaligned load // that may overlap with a previous load. This is OK because it converts // a loop into a small number of very fast vector instructions. // // The primary downside of this algorithm is that it's effectively // completely unsafe. Therefore, we have to be super careful to avoid // undefined behavior: // // 1. We use raw pointers everywhere. Not only does dereferencing a pointer // require the pointer to be valid, but we actually can't even store the // address of an invalid pointer (unless it's 1 past the end of // haystack) without sacrificing performance. // 2. _mm_loadu_si128 is used when you don't care about alignment, and // _mm_load_si128 is used when you do care. You cannot use the latter // on unaligned pointers. // 3. We make liberal use of debug_assert! to check assumptions. // 4. We make a concerted effort to stick with pointers instead of indices. // Indices are nicer because there's less to worry about with them (see // above about pointer offsets), but I could not get the compiler to // produce as good of code as what the below produces. In any case, // pointers are what we really care about here, and alignment is // expressed a bit more naturally with them. // // In general, most of the algorithms in this crate have a similar // structure to what you see below, so this comment applies fairly well to // all of them. let vn1 = _mm_set1_epi8(n1 as i8); let len = haystack.len(); let loop_size = cmp::min(LOOP_SIZE, len); let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let mut ptr = start_ptr; if haystack.len() < VECTOR_SIZE { while ptr < end_ptr { if *ptr == n1 { return Some(sub(ptr, start_ptr)); } ptr = ptr.offset(1); } return None; } if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) { return Some(i); } ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) { debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); let a = _mm_load_si128(ptr as *const __m128i); let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i); let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i); let eqa = _mm_cmpeq_epi8(vn1, a); let eqb = _mm_cmpeq_epi8(vn1, b); let eqc = _mm_cmpeq_epi8(vn1, c); let eqd = _mm_cmpeq_epi8(vn1, d); let or1 = _mm_or_si128(eqa, eqb); let or2 = _mm_or_si128(eqc, eqd); let or3 = _mm_or_si128(or1, or2); if _mm_movemask_epi8(or3) != 0 { let mut at = sub(ptr, start_ptr); let mask = _mm_movemask_epi8(eqa); if mask != 0 { return Some(at + forward_pos(mask)); } at += VECTOR_SIZE; let mask = _mm_movemask_epi8(eqb); if mask != 0 { return Some(at + forward_pos(mask)); } at += VECTOR_SIZE; let mask = _mm_movemask_epi8(eqc); if mask != 0 { return Some(at + forward_pos(mask)); } at += VECTOR_SIZE; let mask = _mm_movemask_epi8(eqd); debug_assert!(mask != 0); return Some(at + forward_pos(mask)); } ptr = ptr.add(loop_size); } while ptr <= end_ptr.sub(VECTOR_SIZE) { debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE); if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) { return Some(i); } ptr = ptr.add(VECTOR_SIZE); } if ptr < end_ptr { debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); return forward_search1(start_ptr, end_ptr, ptr, vn1); } None } #[target_feature(enable = "sse2")] pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { let vn1 = _mm_set1_epi8(n1 as i8); let vn2 = _mm_set1_epi8(n2 as i8); let len = haystack.len(); let loop_size = cmp::min(LOOP_SIZE2, len); let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let mut ptr = start_ptr; if haystack.len() < VECTOR_SIZE { while ptr < end_ptr { if *ptr == n1 || *ptr == n2 { return Some(sub(ptr, start_ptr)); } ptr = ptr.offset(1); } return None; } if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) { return Some(i); } ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) { debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); let a = _mm_load_si128(ptr as *const __m128i); let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); let eqa1 = _mm_cmpeq_epi8(vn1, a); let eqb1 = _mm_cmpeq_epi8(vn1, b); let eqa2 = _mm_cmpeq_epi8(vn2, a); let eqb2 = _mm_cmpeq_epi8(vn2, b); let or1 = _mm_or_si128(eqa1, eqb1); let or2 = _mm_or_si128(eqa2, eqb2); let or3 = _mm_or_si128(or1, or2); if _mm_movemask_epi8(or3) != 0 { let mut at = sub(ptr, start_ptr); let mask1 = _mm_movemask_epi8(eqa1); let mask2 = _mm_movemask_epi8(eqa2); if mask1 != 0 || mask2 != 0 { return Some(at + forward_pos2(mask1, mask2)); } at += VECTOR_SIZE; let mask1 = _mm_movemask_epi8(eqb1); let mask2 = _mm_movemask_epi8(eqb2); return Some(at + forward_pos2(mask1, mask2)); } ptr = ptr.add(loop_size); } while ptr <= end_ptr.sub(VECTOR_SIZE) { if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) { return Some(i); } ptr = ptr.add(VECTOR_SIZE); } if ptr < end_ptr { debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2); } None } #[target_feature(enable = "sse2")] pub unsafe fn memchr3( n1: u8, n2: u8, n3: u8, haystack: &[u8], ) -> Option { let vn1 = _mm_set1_epi8(n1 as i8); let vn2 = _mm_set1_epi8(n2 as i8); let vn3 = _mm_set1_epi8(n3 as i8); let len = haystack.len(); let loop_size = cmp::min(LOOP_SIZE2, len); let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let mut ptr = start_ptr; if haystack.len() < VECTOR_SIZE { while ptr < end_ptr { if *ptr == n1 || *ptr == n2 || *ptr == n3 { return Some(sub(ptr, start_ptr)); } ptr = ptr.offset(1); } return None; } if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { return Some(i); } ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) { debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); let a = _mm_load_si128(ptr as *const __m128i); let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); let eqa1 = _mm_cmpeq_epi8(vn1, a); let eqb1 = _mm_cmpeq_epi8(vn1, b); let eqa2 = _mm_cmpeq_epi8(vn2, a); let eqb2 = _mm_cmpeq_epi8(vn2, b); let eqa3 = _mm_cmpeq_epi8(vn3, a); let eqb3 = _mm_cmpeq_epi8(vn3, b); let or1 = _mm_or_si128(eqa1, eqb1); let or2 = _mm_or_si128(eqa2, eqb2); let or3 = _mm_or_si128(eqa3, eqb3); let or4 = _mm_or_si128(or1, or2); let or5 = _mm_or_si128(or3, or4); if _mm_movemask_epi8(or5) != 0 { let mut at = sub(ptr, start_ptr); let mask1 = _mm_movemask_epi8(eqa1); let mask2 = _mm_movemask_epi8(eqa2); let mask3 = _mm_movemask_epi8(eqa3); if mask1 != 0 || mask2 != 0 || mask3 != 0 { return Some(at + forward_pos3(mask1, mask2, mask3)); } at += VECTOR_SIZE; let mask1 = _mm_movemask_epi8(eqb1); let mask2 = _mm_movemask_epi8(eqb2); let mask3 = _mm_movemask_epi8(eqb3); return Some(at + forward_pos3(mask1, mask2, mask3)); } ptr = ptr.add(loop_size); } while ptr <= end_ptr.sub(VECTOR_SIZE) { if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { return Some(i); } ptr = ptr.add(VECTOR_SIZE); } if ptr < end_ptr { debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3); } None } #[target_feature(enable = "sse2")] pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option { let vn1 = _mm_set1_epi8(n1 as i8); let len = haystack.len(); let loop_size = cmp::min(LOOP_SIZE, len); let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let mut ptr = end_ptr; if haystack.len() < VECTOR_SIZE { while ptr > start_ptr { ptr = ptr.offset(-1); if *ptr == n1 { return Some(sub(ptr, start_ptr)); } } return None; } ptr = ptr.sub(VECTOR_SIZE); if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) { return Some(i); } ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; debug_assert!(start_ptr <= ptr && ptr <= end_ptr); while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) { debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); ptr = ptr.sub(loop_size); let a = _mm_load_si128(ptr as *const __m128i); let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i); let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i); let eqa = _mm_cmpeq_epi8(vn1, a); let eqb = _mm_cmpeq_epi8(vn1, b); let eqc = _mm_cmpeq_epi8(vn1, c); let eqd = _mm_cmpeq_epi8(vn1, d); let or1 = _mm_or_si128(eqa, eqb); let or2 = _mm_or_si128(eqc, eqd); let or3 = _mm_or_si128(or1, or2); if _mm_movemask_epi8(or3) != 0 { let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr); let mask = _mm_movemask_epi8(eqd); if mask != 0 { return Some(at + reverse_pos(mask)); } at -= VECTOR_SIZE; let mask = _mm_movemask_epi8(eqc); if mask != 0 { return Some(at + reverse_pos(mask)); } at -= VECTOR_SIZE; let mask = _mm_movemask_epi8(eqb); if mask != 0 { return Some(at + reverse_pos(mask)); } at -= VECTOR_SIZE; let mask = _mm_movemask_epi8(eqa); debug_assert!(mask != 0); return Some(at + reverse_pos(mask)); } } while ptr >= start_ptr.add(VECTOR_SIZE) { ptr = ptr.sub(VECTOR_SIZE); if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) { return Some(i); } } if ptr > start_ptr { debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); return reverse_search1(start_ptr, end_ptr, start_ptr, vn1); } None } #[target_feature(enable = "sse2")] pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { let vn1 = _mm_set1_epi8(n1 as i8); let vn2 = _mm_set1_epi8(n2 as i8); let len = haystack.len(); let loop_size = cmp::min(LOOP_SIZE2, len); let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let mut ptr = end_ptr; if haystack.len() < VECTOR_SIZE { while ptr > start_ptr { ptr = ptr.offset(-1); if *ptr == n1 || *ptr == n2 { return Some(sub(ptr, start_ptr)); } } return None; } ptr = ptr.sub(VECTOR_SIZE); if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) { return Some(i); } ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; debug_assert!(start_ptr <= ptr && ptr <= end_ptr); while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) { debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); ptr = ptr.sub(loop_size); let a = _mm_load_si128(ptr as *const __m128i); let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); let eqa1 = _mm_cmpeq_epi8(vn1, a); let eqb1 = _mm_cmpeq_epi8(vn1, b); let eqa2 = _mm_cmpeq_epi8(vn2, a); let eqb2 = _mm_cmpeq_epi8(vn2, b); let or1 = _mm_or_si128(eqa1, eqb1); let or2 = _mm_or_si128(eqa2, eqb2); let or3 = _mm_or_si128(or1, or2); if _mm_movemask_epi8(or3) != 0 { let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr); let mask1 = _mm_movemask_epi8(eqb1); let mask2 = _mm_movemask_epi8(eqb2); if mask1 != 0 || mask2 != 0 { return Some(at + reverse_pos2(mask1, mask2)); } at -= VECTOR_SIZE; let mask1 = _mm_movemask_epi8(eqa1); let mask2 = _mm_movemask_epi8(eqa2); return Some(at + reverse_pos2(mask1, mask2)); } } while ptr >= start_ptr.add(VECTOR_SIZE) { ptr = ptr.sub(VECTOR_SIZE); if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) { return Some(i); } } if ptr > start_ptr { debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2); } None } #[target_feature(enable = "sse2")] pub unsafe fn memrchr3( n1: u8, n2: u8, n3: u8, haystack: &[u8], ) -> Option { let vn1 = _mm_set1_epi8(n1 as i8); let vn2 = _mm_set1_epi8(n2 as i8); let vn3 = _mm_set1_epi8(n3 as i8); let len = haystack.len(); let loop_size = cmp::min(LOOP_SIZE2, len); let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let mut ptr = end_ptr; if haystack.len() < VECTOR_SIZE { while ptr > start_ptr { ptr = ptr.offset(-1); if *ptr == n1 || *ptr == n2 || *ptr == n3 { return Some(sub(ptr, start_ptr)); } } return None; } ptr = ptr.sub(VECTOR_SIZE); if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { return Some(i); } ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; debug_assert!(start_ptr <= ptr && ptr <= end_ptr); while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) { debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); ptr = ptr.sub(loop_size); let a = _mm_load_si128(ptr as *const __m128i); let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); let eqa1 = _mm_cmpeq_epi8(vn1, a); let eqb1 = _mm_cmpeq_epi8(vn1, b); let eqa2 = _mm_cmpeq_epi8(vn2, a); let eqb2 = _mm_cmpeq_epi8(vn2, b); let eqa3 = _mm_cmpeq_epi8(vn3, a); let eqb3 = _mm_cmpeq_epi8(vn3, b); let or1 = _mm_or_si128(eqa1, eqb1); let or2 = _mm_or_si128(eqa2, eqb2); let or3 = _mm_or_si128(eqa3, eqb3); let or4 = _mm_or_si128(or1, or2); let or5 = _mm_or_si128(or3, or4); if _mm_movemask_epi8(or5) != 0 { let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr); let mask1 = _mm_movemask_epi8(eqb1); let mask2 = _mm_movemask_epi8(eqb2); let mask3 = _mm_movemask_epi8(eqb3); if mask1 != 0 || mask2 != 0 || mask3 != 0 { return Some(at + reverse_pos3(mask1, mask2, mask3)); } at -= VECTOR_SIZE; let mask1 = _mm_movemask_epi8(eqa1); let mask2 = _mm_movemask_epi8(eqa2); let mask3 = _mm_movemask_epi8(eqa3); return Some(at + reverse_pos3(mask1, mask2, mask3)); } } while ptr >= start_ptr.add(VECTOR_SIZE) { ptr = ptr.sub(VECTOR_SIZE); if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { return Some(i); } } if ptr > start_ptr { debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3); } None } #[target_feature(enable = "sse2")] pub unsafe fn forward_search1( start_ptr: *const u8, end_ptr: *const u8, ptr: *const u8, vn1: __m128i, ) -> Option { debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); let chunk = _mm_loadu_si128(ptr as *const __m128i); let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vn1)); if mask != 0 { Some(sub(ptr, start_ptr) + forward_pos(mask)) } else { None } } #[target_feature(enable = "sse2")] unsafe fn forward_search2( start_ptr: *const u8, end_ptr: *const u8, ptr: *const u8, vn1: __m128i, vn2: __m128i, ) -> Option { debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); let chunk = _mm_loadu_si128(ptr as *const __m128i); let eq1 = _mm_cmpeq_epi8(chunk, vn1); let eq2 = _mm_cmpeq_epi8(chunk, vn2); if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 { let mask1 = _mm_movemask_epi8(eq1); let mask2 = _mm_movemask_epi8(eq2); Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2)) } else { None } } #[target_feature(enable = "sse2")] pub unsafe fn forward_search3( start_ptr: *const u8, end_ptr: *const u8, ptr: *const u8, vn1: __m128i, vn2: __m128i, vn3: __m128i, ) -> Option { debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); let chunk = _mm_loadu_si128(ptr as *const __m128i); let eq1 = _mm_cmpeq_epi8(chunk, vn1); let eq2 = _mm_cmpeq_epi8(chunk, vn2); let eq3 = _mm_cmpeq_epi8(chunk, vn3); let or = _mm_or_si128(eq1, eq2); if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 { let mask1 = _mm_movemask_epi8(eq1); let mask2 = _mm_movemask_epi8(eq2); let mask3 = _mm_movemask_epi8(eq3); Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3)) } else { None } } #[target_feature(enable = "sse2")] unsafe fn reverse_search1( start_ptr: *const u8, end_ptr: *const u8, ptr: *const u8, vn1: __m128i, ) -> Option { debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); let chunk = _mm_loadu_si128(ptr as *const __m128i); let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(vn1, chunk)); if mask != 0 { Some(sub(ptr, start_ptr) + reverse_pos(mask)) } else { None } } #[target_feature(enable = "sse2")] unsafe fn reverse_search2( start_ptr: *const u8, end_ptr: *const u8, ptr: *const u8, vn1: __m128i, vn2: __m128i, ) -> Option { debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); let chunk = _mm_loadu_si128(ptr as *const __m128i); let eq1 = _mm_cmpeq_epi8(chunk, vn1); let eq2 = _mm_cmpeq_epi8(chunk, vn2); if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 { let mask1 = _mm_movemask_epi8(eq1); let mask2 = _mm_movemask_epi8(eq2); Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2)) } else { None } } #[target_feature(enable = "sse2")] unsafe fn reverse_search3( start_ptr: *const u8, end_ptr: *const u8, ptr: *const u8, vn1: __m128i, vn2: __m128i, vn3: __m128i, ) -> Option { debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); debug_assert!(start_ptr <= ptr); debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); let chunk = _mm_loadu_si128(ptr as *const __m128i); let eq1 = _mm_cmpeq_epi8(chunk, vn1); let eq2 = _mm_cmpeq_epi8(chunk, vn2); let eq3 = _mm_cmpeq_epi8(chunk, vn3); let or = _mm_or_si128(eq1, eq2); if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 { let mask1 = _mm_movemask_epi8(eq1); let mask2 = _mm_movemask_epi8(eq2); let mask3 = _mm_movemask_epi8(eq3); Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3)) } else { None } } /// Compute the position of the first matching byte from the given mask. The /// position returned is always in the range [0, 15]. /// /// The mask given is expected to be the result of _mm_movemask_epi8. fn forward_pos(mask: i32) -> usize { // We are dealing with little endian here, where the most significant byte // is at a higher address. That means the least significant bit that is set // corresponds to the position of our first matching byte. That position // corresponds to the number of zeros after the least significant bit. mask.trailing_zeros() as usize } /// Compute the position of the first matching byte from the given masks. The /// position returned is always in the range [0, 15]. Each mask corresponds to /// the equality comparison of a single byte. /// /// The masks given are expected to be the result of _mm_movemask_epi8, where /// at least one of the masks is non-zero (i.e., indicates a match). fn forward_pos2(mask1: i32, mask2: i32) -> usize { debug_assert!(mask1 != 0 || mask2 != 0); forward_pos(mask1 | mask2) } /// Compute the position of the first matching byte from the given masks. The /// position returned is always in the range [0, 15]. Each mask corresponds to /// the equality comparison of a single byte. /// /// The masks given are expected to be the result of _mm_movemask_epi8, where /// at least one of the masks is non-zero (i.e., indicates a match). fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize { debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0); forward_pos(mask1 | mask2 | mask3) } /// Compute the position of the last matching byte from the given mask. The /// position returned is always in the range [0, 15]. /// /// The mask given is expected to be the result of _mm_movemask_epi8. fn reverse_pos(mask: i32) -> usize { // We are dealing with little endian here, where the most significant byte // is at a higher address. That means the most significant bit that is set // corresponds to the position of our last matching byte. The position from // the end of the mask is therefore the number of leading zeros in a 16 // bit integer, and the position from the start of the mask is therefore // 16 - (leading zeros) - 1. VECTOR_SIZE - (mask as u16).leading_zeros() as usize - 1 } /// Compute the position of the last matching byte from the given masks. The /// position returned is always in the range [0, 15]. Each mask corresponds to /// the equality comparison of a single byte. /// /// The masks given are expected to be the result of _mm_movemask_epi8, where /// at least one of the masks is non-zero (i.e., indicates a match). fn reverse_pos2(mask1: i32, mask2: i32) -> usize { debug_assert!(mask1 != 0 || mask2 != 0); reverse_pos(mask1 | mask2) } /// Compute the position of the last matching byte from the given masks. The /// position returned is always in the range [0, 15]. Each mask corresponds to /// the equality comparison of a single byte. /// /// The masks given are expected to be the result of _mm_movemask_epi8, where /// at least one of the masks is non-zero (i.e., indicates a match). fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize { debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0); reverse_pos(mask1 | mask2 | mask3) } /// Subtract `b` from `a` and return the difference. `a` should be greater than /// or equal to `b`. fn sub(a: *const u8, b: *const u8) -> usize { debug_assert!(a >= b); (a as usize) - (b as usize) } memchr-2.4.1/src/memchr/x86/sse42.rs000064400000000000000000000046220072674642500151340ustar 00000000000000// This code is unused. PCMPESTRI is gratuitously slow. I imagine it might // start winning with a hypothetical memchr4 (or greater). This technique might // also be good for exposing searches over ranges of bytes, but that departs // from the standard memchr API, so it's not clear whether we actually want // that or not. // // N.B. PCMPISTRI appears to be about twice as fast as PCMPESTRI, which is kind // of neat. Unfortunately, UTF-8 strings can contain NUL bytes, which means // I don't see a way of effectively using PCMPISTRI unless there's some fast // way to replace zero bytes with a byte that is not not a needle byte. use core::{arch::x86_64::*, mem::size_of}; use x86::sse2; const VECTOR_SIZE: usize = size_of::<__m128i>(); const CONTROL_ANY: i32 = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_POSITIVE_POLARITY | _SIDD_LEAST_SIGNIFICANT; #[target_feature(enable = "sse4.2")] pub unsafe fn memchr3( n1: u8, n2: u8, n3: u8, haystack: &[u8], ) -> Option { let vn1 = _mm_set1_epi8(n1 as i8); let vn2 = _mm_set1_epi8(n2 as i8); let vn3 = _mm_set1_epi8(n3 as i8); let vn = _mm_setr_epi8( n1 as i8, n2 as i8, n3 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ); let len = haystack.len(); let start_ptr = haystack.as_ptr(); let end_ptr = haystack[haystack.len()..].as_ptr(); let mut ptr = start_ptr; if haystack.len() < VECTOR_SIZE { while ptr < end_ptr { if *ptr == n1 || *ptr == n2 || *ptr == n3 { return Some(sub(ptr, start_ptr)); } ptr = ptr.offset(1); } return None; } while ptr <= end_ptr.sub(VECTOR_SIZE) { let chunk = _mm_loadu_si128(ptr as *const __m128i); let res = _mm_cmpestri(vn, 3, chunk, 16, CONTROL_ANY); if res < 16 { return Some(sub(ptr, start_ptr) + res as usize); } ptr = ptr.add(VECTOR_SIZE); } if ptr < end_ptr { debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); return sse2::forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3); } None } /// Subtract `b` from `a` and return the difference. `a` should be greater than /// or equal to `b`. fn sub(a: *const u8, b: *const u8) -> usize { debug_assert!(a >= b); (a as usize) - (b as usize) } memchr-2.4.1/src/memmem/byte_frequencies.rs000064400000000000000000000105170072674642500171050ustar 00000000000000pub const BYTE_FREQUENCIES: [u8; 256] = [ 55, // '\x00' 52, // '\x01' 51, // '\x02' 50, // '\x03' 49, // '\x04' 48, // '\x05' 47, // '\x06' 46, // '\x07' 45, // '\x08' 103, // '\t' 242, // '\n' 66, // '\x0b' 67, // '\x0c' 229, // '\r' 44, // '\x0e' 43, // '\x0f' 42, // '\x10' 41, // '\x11' 40, // '\x12' 39, // '\x13' 38, // '\x14' 37, // '\x15' 36, // '\x16' 35, // '\x17' 34, // '\x18' 33, // '\x19' 56, // '\x1a' 32, // '\x1b' 31, // '\x1c' 30, // '\x1d' 29, // '\x1e' 28, // '\x1f' 255, // ' ' 148, // '!' 164, // '"' 149, // '#' 136, // '$' 160, // '%' 155, // '&' 173, // "'" 221, // '(' 222, // ')' 134, // '*' 122, // '+' 232, // ',' 202, // '-' 215, // '.' 224, // '/' 208, // '0' 220, // '1' 204, // '2' 187, // '3' 183, // '4' 179, // '5' 177, // '6' 168, // '7' 178, // '8' 200, // '9' 226, // ':' 195, // ';' 154, // '<' 184, // '=' 174, // '>' 126, // '?' 120, // '@' 191, // 'A' 157, // 'B' 194, // 'C' 170, // 'D' 189, // 'E' 162, // 'F' 161, // 'G' 150, // 'H' 193, // 'I' 142, // 'J' 137, // 'K' 171, // 'L' 176, // 'M' 185, // 'N' 167, // 'O' 186, // 'P' 112, // 'Q' 175, // 'R' 192, // 'S' 188, // 'T' 156, // 'U' 140, // 'V' 143, // 'W' 123, // 'X' 133, // 'Y' 128, // 'Z' 147, // '[' 138, // '\\' 146, // ']' 114, // '^' 223, // '_' 151, // '`' 249, // 'a' 216, // 'b' 238, // 'c' 236, // 'd' 253, // 'e' 227, // 'f' 218, // 'g' 230, // 'h' 247, // 'i' 135, // 'j' 180, // 'k' 241, // 'l' 233, // 'm' 246, // 'n' 244, // 'o' 231, // 'p' 139, // 'q' 245, // 'r' 243, // 's' 251, // 't' 235, // 'u' 201, // 'v' 196, // 'w' 240, // 'x' 214, // 'y' 152, // 'z' 182, // '{' 205, // '|' 181, // '}' 127, // '~' 27, // '\x7f' 212, // '\x80' 211, // '\x81' 210, // '\x82' 213, // '\x83' 228, // '\x84' 197, // '\x85' 169, // '\x86' 159, // '\x87' 131, // '\x88' 172, // '\x89' 105, // '\x8a' 80, // '\x8b' 98, // '\x8c' 96, // '\x8d' 97, // '\x8e' 81, // '\x8f' 207, // '\x90' 145, // '\x91' 116, // '\x92' 115, // '\x93' 144, // '\x94' 130, // '\x95' 153, // '\x96' 121, // '\x97' 107, // '\x98' 132, // '\x99' 109, // '\x9a' 110, // '\x9b' 124, // '\x9c' 111, // '\x9d' 82, // '\x9e' 108, // '\x9f' 118, // '\xa0' 141, // '¡' 113, // '¢' 129, // '£' 119, // '¤' 125, // '¥' 165, // '¦' 117, // '§' 92, // '¨' 106, // '©' 83, // 'ª' 72, // '«' 99, // '¬' 93, // '\xad' 65, // '®' 79, // '¯' 166, // '°' 237, // '±' 163, // '²' 199, // '³' 190, // '´' 225, // 'µ' 209, // '¶' 203, // '·' 198, // '¸' 217, // '¹' 219, // 'º' 206, // '»' 234, // '¼' 248, // '½' 158, // '¾' 239, // '¿' 255, // 'À' 255, // 'Á' 255, // 'Â' 255, // 'Ã' 255, // 'Ä' 255, // 'Å' 255, // 'Æ' 255, // 'Ç' 255, // 'È' 255, // 'É' 255, // 'Ê' 255, // 'Ë' 255, // 'Ì' 255, // 'Í' 255, // 'Î' 255, // 'Ï' 255, // 'Ð' 255, // 'Ñ' 255, // 'Ò' 255, // 'Ó' 255, // 'Ô' 255, // 'Õ' 255, // 'Ö' 255, // '×' 255, // 'Ø' 255, // 'Ù' 255, // 'Ú' 255, // 'Û' 255, // 'Ü' 255, // 'Ý' 255, // 'Þ' 255, // 'ß' 255, // 'à' 255, // 'á' 255, // 'â' 255, // 'ã' 255, // 'ä' 255, // 'å' 255, // 'æ' 255, // 'ç' 255, // 'è' 255, // 'é' 255, // 'ê' 255, // 'ë' 255, // 'ì' 255, // 'í' 255, // 'î' 255, // 'ï' 255, // 'ð' 255, // 'ñ' 255, // 'ò' 255, // 'ó' 255, // 'ô' 255, // 'õ' 255, // 'ö' 255, // '÷' 255, // 'ø' 255, // 'ù' 255, // 'ú' 255, // 'û' 255, // 'ü' 255, // 'ý' 255, // 'þ' 255, // 'ÿ' ]; memchr-2.4.1/src/memmem/genericsimd.rs000064400000000000000000000250300072674642500160360ustar 00000000000000use core::mem::size_of; use crate::memmem::{util::memcmp, vector::Vector, NeedleInfo}; /// The minimum length of a needle required for this algorithm. The minimum /// is 2 since a length of 1 should just use memchr and a length of 0 isn't /// a case handled by this searcher. pub(crate) const MIN_NEEDLE_LEN: usize = 2; /// The maximum length of a needle required for this algorithm. /// /// In reality, there is no hard max here. The code below can handle any /// length needle. (Perhaps that suggests there are missing optimizations.) /// Instead, this is a heuristic and a bound guaranteeing our linear time /// complexity. /// /// It is a heuristic because when a candidate match is found, memcmp is run. /// For very large needles with lots of false positives, memcmp can make the /// code run quite slow. /// /// It is a bound because the worst case behavior with memcmp is multiplicative /// in the size of the needle and haystack, and we want to keep that additive. /// This bound ensures we still meet that bound theoretically, since it's just /// a constant. We aren't acting in bad faith here, memcmp on tiny needles /// is so fast that even in pathological cases (see pathological vector /// benchmarks), this is still just as fast or faster in practice. /// /// This specific number was chosen by tweaking a bit and running benchmarks. /// The rare-medium-needle, for example, gets about 5% faster by using this /// algorithm instead of a prefilter-accelerated Two-Way. There's also a /// theoretical desire to keep this number reasonably low, to mitigate the /// impact of pathological cases. I did try 64, and some benchmarks got a /// little better, and others (particularly the pathological ones), got a lot /// worse. So... 32 it is? pub(crate) const MAX_NEEDLE_LEN: usize = 32; /// The implementation of the forward vector accelerated substring search. /// /// This is extremely similar to the prefilter vector module by the same name. /// The key difference is that this is not a prefilter. Instead, it handles /// confirming its own matches. The trade off is that this only works with /// smaller needles. The speed up here is that an inlined memcmp on a tiny /// needle is very quick, even on pathological inputs. This is much better than /// combining a prefilter with Two-Way, where using Two-Way to confirm the /// match has higher latency. /// /// So why not use this for all needles? We could, and it would probably work /// really well on most inputs. But its worst case is multiplicative and we /// want to guarantee worst case additive time. Some of the benchmarks try to /// justify this (see the pathological ones). /// /// The prefilter variant of this has more comments. Also note that we only /// implement this for forward searches for now. If you have a compelling use /// case for accelerated reverse search, please file an issue. #[derive(Clone, Copy, Debug)] pub(crate) struct Forward { rare1i: u8, rare2i: u8, } impl Forward { /// Create a new "generic simd" forward searcher. If one could not be /// created from the given inputs, then None is returned. pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option { let (rare1i, rare2i) = ninfo.rarebytes.as_rare_ordered_u8(); // If the needle is too short or too long, give up. Also, give up // if the rare bytes detected are at the same position. (It likely // suggests a degenerate case, although it should technically not be // possible.) if needle.len() < MIN_NEEDLE_LEN || needle.len() > MAX_NEEDLE_LEN || rare1i == rare2i { return None; } Some(Forward { rare1i, rare2i }) } /// Returns the minimum length of haystack that is needed for this searcher /// to work for a particular vector. Passing a haystack with a length /// smaller than this will cause `fwd_find` to panic. #[inline(always)] pub(crate) fn min_haystack_len(&self) -> usize { self.rare2i as usize + size_of::() } } /// Searches the given haystack for the given needle. The needle given should /// be the same as the needle that this searcher was initialized with. /// /// # Panics /// /// When the given haystack has a length smaller than `min_haystack_len`. /// /// # Safety /// /// Since this is meant to be used with vector functions, callers need to /// specialize this inside of a function with a `target_feature` attribute. /// Therefore, callers must ensure that whatever target feature is being used /// supports the vector functions that this function is specialized for. (For /// the specific vector functions used, see the Vector trait implementations.) #[inline(always)] pub(crate) unsafe fn fwd_find( fwd: &Forward, haystack: &[u8], needle: &[u8], ) -> Option { // It would be nice if we didn't have this check here, since the meta // searcher should handle it for us. But without this, I don't think we // guarantee that end_ptr.sub(needle.len()) won't result in UB. We could // put it as part of the safety contract, but it makes it more complicated // than necessary. if haystack.len() < needle.len() { return None; } let min_haystack_len = fwd.min_haystack_len::(); assert!(haystack.len() >= min_haystack_len, "haystack too small"); debug_assert!(needle.len() <= haystack.len()); debug_assert!( needle.len() >= MIN_NEEDLE_LEN, "needle must be at least {} bytes", MIN_NEEDLE_LEN, ); debug_assert!( needle.len() <= MAX_NEEDLE_LEN, "needle must be at most {} bytes", MAX_NEEDLE_LEN, ); let (rare1i, rare2i) = (fwd.rare1i as usize, fwd.rare2i as usize); let rare1chunk = V::splat(needle[rare1i]); let rare2chunk = V::splat(needle[rare2i]); let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let max_ptr = end_ptr.sub(min_haystack_len); let mut ptr = start_ptr; // N.B. I did experiment with unrolling the loop to deal with size(V) // bytes at a time and 2*size(V) bytes at a time. The double unroll was // marginally faster while the quadruple unroll was unambiguously slower. // In the end, I decided the complexity from unrolling wasn't worth it. I // used the memmem/krate/prebuilt/huge-en/ benchmarks to compare. while ptr <= max_ptr { let m = fwd_find_in_chunk( fwd, needle, ptr, end_ptr, rare1chunk, rare2chunk, !0, ); if let Some(chunki) = m { return Some(matched(start_ptr, ptr, chunki)); } ptr = ptr.add(size_of::()); } if ptr < end_ptr { let remaining = diff(end_ptr, ptr); debug_assert!( remaining < min_haystack_len, "remaining bytes should be smaller than the minimum haystack \ length of {}, but there are {} bytes remaining", min_haystack_len, remaining, ); if remaining < needle.len() { return None; } debug_assert!( max_ptr < ptr, "after main loop, ptr should have exceeded max_ptr", ); let overlap = diff(ptr, max_ptr); debug_assert!( overlap > 0, "overlap ({}) must always be non-zero", overlap, ); debug_assert!( overlap < size_of::(), "overlap ({}) cannot possibly be >= than a vector ({})", overlap, size_of::(), ); // The mask has all of its bits set except for the first N least // significant bits, where N=overlap. This way, any matches that // occur in find_in_chunk within the overlap are automatically // ignored. let mask = !((1 << overlap) - 1); ptr = max_ptr; let m = fwd_find_in_chunk( fwd, needle, ptr, end_ptr, rare1chunk, rare2chunk, mask, ); if let Some(chunki) = m { return Some(matched(start_ptr, ptr, chunki)); } } None } /// Search for an occurrence of two rare bytes from the needle in the chunk /// pointed to by ptr, with the end of the haystack pointed to by end_ptr. When /// an occurrence is found, memcmp is run to check if a match occurs at the /// corresponding position. /// /// rare1chunk and rare2chunk correspond to vectors with the rare1 and rare2 /// bytes repeated in each 8-bit lane, respectively. /// /// mask should have bits set corresponding the positions in the chunk in which /// matches are considered. This is only used for the last vector load where /// the beginning of the vector might have overlapped with the last load in /// the main loop. The mask lets us avoid visiting positions that have already /// been discarded as matches. /// /// # Safety /// /// It must be safe to do an unaligned read of size(V) bytes starting at both /// (ptr + rare1i) and (ptr + rare2i). It must also be safe to do unaligned /// loads on ptr up to (end_ptr - needle.len()). #[inline(always)] unsafe fn fwd_find_in_chunk( fwd: &Forward, needle: &[u8], ptr: *const u8, end_ptr: *const u8, rare1chunk: V, rare2chunk: V, mask: u32, ) -> Option { let chunk0 = V::load_unaligned(ptr.add(fwd.rare1i as usize)); let chunk1 = V::load_unaligned(ptr.add(fwd.rare2i as usize)); let eq0 = chunk0.cmpeq(rare1chunk); let eq1 = chunk1.cmpeq(rare2chunk); let mut match_offsets = eq0.and(eq1).movemask() & mask; while match_offsets != 0 { let offset = match_offsets.trailing_zeros() as usize; let ptr = ptr.add(offset); if end_ptr.sub(needle.len()) < ptr { return None; } let chunk = core::slice::from_raw_parts(ptr, needle.len()); if memcmp(needle, chunk) { return Some(offset); } match_offsets &= match_offsets - 1; } None } /// Accepts a chunk-relative offset and returns a haystack relative offset /// after updating the prefilter state. /// /// See the same function with the same name in the prefilter variant of this /// algorithm to learned why it's tagged with inline(never). Even here, where /// the function is simpler, inlining it leads to poorer codegen. (Although /// it does improve some benchmarks, like prebuiltiter/huge-en/common-you.) #[cold] #[inline(never)] fn matched(start_ptr: *const u8, ptr: *const u8, chunki: usize) -> usize { diff(ptr, start_ptr) + chunki } /// Subtract `b` from `a` and return the difference. `a` must be greater than /// or equal to `b`. fn diff(a: *const u8, b: *const u8) -> usize { debug_assert!(a >= b); (a as usize) - (b as usize) } memchr-2.4.1/src/memmem/mod.rs000064400000000000000000001254600072674642500143340ustar 00000000000000/*! This module provides forward and reverse substring search routines. Unlike the standard library's substring search routines, these work on arbitrary bytes. For all non-empty needles, these routines will report exactly the same values as the corresponding routines in the standard library. For the empty needle, the standard library reports matches only at valid UTF-8 boundaries, where as these routines will report matches at every position. Other than being able to work on arbitrary bytes, the primary reason to prefer these routines over the standard library routines is that these will generally be faster. In some cases, significantly so. # Example: iterating over substring matches This example shows how to use [`find_iter`] to find occurrences of a substring in a haystack. ``` use memchr::memmem; let haystack = b"foo bar foo baz foo"; let mut it = memmem::find_iter(haystack, "foo"); assert_eq!(Some(0), it.next()); assert_eq!(Some(8), it.next()); assert_eq!(Some(16), it.next()); assert_eq!(None, it.next()); ``` # Example: iterating over substring matches in reverse This example shows how to use [`rfind_iter`] to find occurrences of a substring in a haystack starting from the end of the haystack. **NOTE:** This module does not implement double ended iterators, so reverse searches aren't done by calling `rev` on a forward iterator. ``` use memchr::memmem; let haystack = b"foo bar foo baz foo"; let mut it = memmem::rfind_iter(haystack, "foo"); assert_eq!(Some(16), it.next()); assert_eq!(Some(8), it.next()); assert_eq!(Some(0), it.next()); assert_eq!(None, it.next()); ``` # Example: repeating a search for the same needle It may be possible for the overhead of constructing a substring searcher to be measurable in some workloads. In cases where the same needle is used to search many haystacks, it is possible to do construction once and thus to avoid it for subsequent searches. This can be done with a [`Finder`] (or a [`FinderRev`] for reverse searches). ``` use memchr::memmem; let finder = memmem::Finder::new("foo"); assert_eq!(Some(4), finder.find(b"baz foo quux")); assert_eq!(None, finder.find(b"quux baz bar")); ``` */ pub use self::prefilter::Prefilter; use crate::{ cow::CowBytes, memmem::{ prefilter::{Pre, PrefilterFn, PrefilterState}, rabinkarp::NeedleHash, rarebytes::RareNeedleBytes, }, }; /// Defines a suite of quickcheck properties for forward and reverse /// substring searching. /// /// This is defined in this specific spot so that it can be used freely among /// the different substring search implementations. I couldn't be bothered to /// fight with the macro-visibility rules enough to figure out how to stuff it /// somewhere more convenient. #[cfg(all(test, feature = "std"))] macro_rules! define_memmem_quickcheck_tests { ($fwd:expr, $rev:expr) => { use crate::memmem::proptests; quickcheck::quickcheck! { fn qc_fwd_prefix_is_substring(bs: Vec) -> bool { proptests::prefix_is_substring(false, &bs, $fwd) } fn qc_fwd_suffix_is_substring(bs: Vec) -> bool { proptests::suffix_is_substring(false, &bs, $fwd) } fn qc_fwd_matches_naive( haystack: Vec, needle: Vec ) -> bool { proptests::matches_naive(false, &haystack, &needle, $fwd) } fn qc_rev_prefix_is_substring(bs: Vec) -> bool { proptests::prefix_is_substring(true, &bs, $rev) } fn qc_rev_suffix_is_substring(bs: Vec) -> bool { proptests::suffix_is_substring(true, &bs, $rev) } fn qc_rev_matches_naive( haystack: Vec, needle: Vec ) -> bool { proptests::matches_naive(true, &haystack, &needle, $rev) } } }; } /// Defines a suite of "simple" hand-written tests for a substring /// implementation. /// /// This is defined here for the same reason that /// define_memmem_quickcheck_tests is defined here. #[cfg(test)] macro_rules! define_memmem_simple_tests { ($fwd:expr, $rev:expr) => { use crate::memmem::testsimples; #[test] fn simple_forward() { testsimples::run_search_tests_fwd($fwd); } #[test] fn simple_reverse() { testsimples::run_search_tests_rev($rev); } }; } mod byte_frequencies; #[cfg(all(target_arch = "x86_64", memchr_runtime_simd))] mod genericsimd; mod prefilter; mod rabinkarp; mod rarebytes; mod twoway; mod util; // SIMD is only supported on x86_64 currently. #[cfg(target_arch = "x86_64")] mod vector; #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] mod x86; /// Returns an iterator over all occurrences of a substring in a haystack. /// /// # Complexity /// /// This routine is guaranteed to have worst case linear time complexity /// with respect to both the needle and the haystack. That is, this runs /// in `O(needle.len() + haystack.len())` time. /// /// This routine is also guaranteed to have worst case constant space /// complexity. /// /// # Examples /// /// Basic usage: /// /// ``` /// use memchr::memmem; /// /// let haystack = b"foo bar foo baz foo"; /// let mut it = memmem::find_iter(haystack, b"foo"); /// assert_eq!(Some(0), it.next()); /// assert_eq!(Some(8), it.next()); /// assert_eq!(Some(16), it.next()); /// assert_eq!(None, it.next()); /// ``` #[inline] pub fn find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( haystack: &'h [u8], needle: &'n N, ) -> FindIter<'h, 'n> { FindIter::new(haystack, Finder::new(needle)) } /// Returns a reverse iterator over all occurrences of a substring in a /// haystack. /// /// # Complexity /// /// This routine is guaranteed to have worst case linear time complexity /// with respect to both the needle and the haystack. That is, this runs /// in `O(needle.len() + haystack.len())` time. /// /// This routine is also guaranteed to have worst case constant space /// complexity. /// /// # Examples /// /// Basic usage: /// /// ``` /// use memchr::memmem; /// /// let haystack = b"foo bar foo baz foo"; /// let mut it = memmem::rfind_iter(haystack, b"foo"); /// assert_eq!(Some(16), it.next()); /// assert_eq!(Some(8), it.next()); /// assert_eq!(Some(0), it.next()); /// assert_eq!(None, it.next()); /// ``` #[inline] pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( haystack: &'h [u8], needle: &'n N, ) -> FindRevIter<'h, 'n> { FindRevIter::new(haystack, FinderRev::new(needle)) } /// Returns the index of the first occurrence of the given needle. /// /// Note that if you're are searching for the same needle in many different /// small haystacks, it may be faster to initialize a [`Finder`] once, /// and reuse it for each search. /// /// # Complexity /// /// This routine is guaranteed to have worst case linear time complexity /// with respect to both the needle and the haystack. That is, this runs /// in `O(needle.len() + haystack.len())` time. /// /// This routine is also guaranteed to have worst case constant space /// complexity. /// /// # Examples /// /// Basic usage: /// /// ``` /// use memchr::memmem; /// /// let haystack = b"foo bar baz"; /// assert_eq!(Some(0), memmem::find(haystack, b"foo")); /// assert_eq!(Some(4), memmem::find(haystack, b"bar")); /// assert_eq!(None, memmem::find(haystack, b"quux")); /// ``` #[inline] pub fn find(haystack: &[u8], needle: &[u8]) -> Option { if haystack.len() < 64 { rabinkarp::find(haystack, needle) } else { Finder::new(needle).find(haystack) } } /// Returns the index of the last occurrence of the given needle. /// /// Note that if you're are searching for the same needle in many different /// small haystacks, it may be faster to initialize a [`FinderRev`] once, /// and reuse it for each search. /// /// # Complexity /// /// This routine is guaranteed to have worst case linear time complexity /// with respect to both the needle and the haystack. That is, this runs /// in `O(needle.len() + haystack.len())` time. /// /// This routine is also guaranteed to have worst case constant space /// complexity. /// /// # Examples /// /// Basic usage: /// /// ``` /// use memchr::memmem; /// /// let haystack = b"foo bar baz"; /// assert_eq!(Some(0), memmem::rfind(haystack, b"foo")); /// assert_eq!(Some(4), memmem::rfind(haystack, b"bar")); /// assert_eq!(Some(8), memmem::rfind(haystack, b"ba")); /// assert_eq!(None, memmem::rfind(haystack, b"quux")); /// ``` #[inline] pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option { if haystack.len() < 64 { rabinkarp::rfind(haystack, needle) } else { FinderRev::new(needle).rfind(haystack) } } /// An iterator over non-overlapping substring matches. /// /// Matches are reported by the byte offset at which they begin. /// /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the /// needle. #[derive(Debug)] pub struct FindIter<'h, 'n> { haystack: &'h [u8], prestate: PrefilterState, finder: Finder<'n>, pos: usize, } impl<'h, 'n> FindIter<'h, 'n> { #[inline(always)] pub(crate) fn new( haystack: &'h [u8], finder: Finder<'n>, ) -> FindIter<'h, 'n> { let prestate = finder.searcher.prefilter_state(); FindIter { haystack, prestate, finder, pos: 0 } } } impl<'h, 'n> Iterator for FindIter<'h, 'n> { type Item = usize; fn next(&mut self) -> Option { if self.pos > self.haystack.len() { return None; } let result = self .finder .searcher .find(&mut self.prestate, &self.haystack[self.pos..]); match result { None => None, Some(i) => { let pos = self.pos + i; self.pos = pos + core::cmp::max(1, self.finder.needle().len()); Some(pos) } } } } /// An iterator over non-overlapping substring matches in reverse. /// /// Matches are reported by the byte offset at which they begin. /// /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the /// needle. #[derive(Debug)] pub struct FindRevIter<'h, 'n> { haystack: &'h [u8], finder: FinderRev<'n>, /// When searching with an empty needle, this gets set to `None` after /// we've yielded the last element at `0`. pos: Option, } impl<'h, 'n> FindRevIter<'h, 'n> { #[inline(always)] pub(crate) fn new( haystack: &'h [u8], finder: FinderRev<'n>, ) -> FindRevIter<'h, 'n> { let pos = Some(haystack.len()); FindRevIter { haystack, finder, pos } } } impl<'h, 'n> Iterator for FindRevIter<'h, 'n> { type Item = usize; fn next(&mut self) -> Option { let pos = match self.pos { None => return None, Some(pos) => pos, }; let result = self.finder.rfind(&self.haystack[..pos]); match result { None => None, Some(i) => { if pos == i { self.pos = pos.checked_sub(1); } else { self.pos = Some(i); } Some(i) } } } } /// A single substring searcher fixed to a particular needle. /// /// The purpose of this type is to permit callers to construct a substring /// searcher that can be used to search haystacks without the overhead of /// constructing the searcher in the first place. This is a somewhat niche /// concern when it's necessary to re-use the same needle to search multiple /// different haystacks with as little overhead as possible. In general, using /// [`find`] is good enough, but `Finder` is useful when you can meaningfully /// observe searcher construction time in a profile. /// /// When the `std` feature is enabled, then this type has an `into_owned` /// version which permits building a `Finder` that is not connected to /// the lifetime of its needle. #[derive(Clone, Debug)] pub struct Finder<'n> { searcher: Searcher<'n>, } impl<'n> Finder<'n> { /// Create a new finder for the given needle. #[inline] pub fn new>(needle: &'n B) -> Finder<'n> { FinderBuilder::new().build_forward(needle) } /// Returns the index of the first occurrence of this needle in the given /// haystack. /// /// # Complexity /// /// This routine is guaranteed to have worst case linear time complexity /// with respect to both the needle and the haystack. That is, this runs /// in `O(needle.len() + haystack.len())` time. /// /// This routine is also guaranteed to have worst case constant space /// complexity. /// /// # Examples /// /// Basic usage: /// /// ``` /// use memchr::memmem::Finder; /// /// let haystack = b"foo bar baz"; /// assert_eq!(Some(0), Finder::new("foo").find(haystack)); /// assert_eq!(Some(4), Finder::new("bar").find(haystack)); /// assert_eq!(None, Finder::new("quux").find(haystack)); /// ``` pub fn find(&self, haystack: &[u8]) -> Option { self.searcher.find(&mut self.searcher.prefilter_state(), haystack) } /// Returns an iterator over all occurrences of a substring in a haystack. /// /// # Complexity /// /// This routine is guaranteed to have worst case linear time complexity /// with respect to both the needle and the haystack. That is, this runs /// in `O(needle.len() + haystack.len())` time. /// /// This routine is also guaranteed to have worst case constant space /// complexity. /// /// # Examples /// /// Basic usage: /// /// ``` /// use memchr::memmem::Finder; /// /// let haystack = b"foo bar foo baz foo"; /// let finder = Finder::new(b"foo"); /// let mut it = finder.find_iter(haystack); /// assert_eq!(Some(0), it.next()); /// assert_eq!(Some(8), it.next()); /// assert_eq!(Some(16), it.next()); /// assert_eq!(None, it.next()); /// ``` #[inline] pub fn find_iter<'a, 'h>( &'a self, haystack: &'h [u8], ) -> FindIter<'h, 'a> { FindIter::new(haystack, self.as_ref()) } /// Convert this finder into its owned variant, such that it no longer /// borrows the needle. /// /// If this is already an owned finder, then this is a no-op. Otherwise, /// this copies the needle. /// /// This is only available when the `std` feature is enabled. #[cfg(feature = "std")] #[inline] pub fn into_owned(self) -> Finder<'static> { Finder { searcher: self.searcher.into_owned() } } /// Convert this finder into its borrowed variant. /// /// This is primarily useful if your finder is owned and you'd like to /// store its borrowed variant in some intermediate data structure. /// /// Note that the lifetime parameter of the returned finder is tied to the /// lifetime of `self`, and may be shorter than the `'n` lifetime of the /// needle itself. Namely, a finder's needle can be either borrowed or /// owned, so the lifetime of the needle returned must necessarily be the /// shorter of the two. #[inline] pub fn as_ref(&self) -> Finder<'_> { Finder { searcher: self.searcher.as_ref() } } /// Returns the needle that this finder searches for. /// /// Note that the lifetime of the needle returned is tied to the lifetime /// of the finder, and may be shorter than the `'n` lifetime. Namely, a /// finder's needle can be either borrowed or owned, so the lifetime of the /// needle returned must necessarily be the shorter of the two. #[inline] pub fn needle(&self) -> &[u8] { self.searcher.needle() } } /// A single substring reverse searcher fixed to a particular needle. /// /// The purpose of this type is to permit callers to construct a substring /// searcher that can be used to search haystacks without the overhead of /// constructing the searcher in the first place. This is a somewhat niche /// concern when it's necessary to re-use the same needle to search multiple /// different haystacks with as little overhead as possible. In general, /// using [`rfind`] is good enough, but `FinderRev` is useful when you can /// meaningfully observe searcher construction time in a profile. /// /// When the `std` feature is enabled, then this type has an `into_owned` /// version which permits building a `FinderRev` that is not connected to /// the lifetime of its needle. #[derive(Clone, Debug)] pub struct FinderRev<'n> { searcher: SearcherRev<'n>, } impl<'n> FinderRev<'n> { /// Create a new reverse finder for the given needle. #[inline] pub fn new>(needle: &'n B) -> FinderRev<'n> { FinderBuilder::new().build_reverse(needle) } /// Returns the index of the last occurrence of this needle in the given /// haystack. /// /// The haystack may be any type that can be cheaply converted into a /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. /// /// # Complexity /// /// This routine is guaranteed to have worst case linear time complexity /// with respect to both the needle and the haystack. That is, this runs /// in `O(needle.len() + haystack.len())` time. /// /// This routine is also guaranteed to have worst case constant space /// complexity. /// /// # Examples /// /// Basic usage: /// /// ``` /// use memchr::memmem::FinderRev; /// /// let haystack = b"foo bar baz"; /// assert_eq!(Some(0), FinderRev::new("foo").rfind(haystack)); /// assert_eq!(Some(4), FinderRev::new("bar").rfind(haystack)); /// assert_eq!(None, FinderRev::new("quux").rfind(haystack)); /// ``` pub fn rfind>(&self, haystack: B) -> Option { self.searcher.rfind(haystack.as_ref()) } /// Returns a reverse iterator over all occurrences of a substring in a /// haystack. /// /// # Complexity /// /// This routine is guaranteed to have worst case linear time complexity /// with respect to both the needle and the haystack. That is, this runs /// in `O(needle.len() + haystack.len())` time. /// /// This routine is also guaranteed to have worst case constant space /// complexity. /// /// # Examples /// /// Basic usage: /// /// ``` /// use memchr::memmem::FinderRev; /// /// let haystack = b"foo bar foo baz foo"; /// let finder = FinderRev::new(b"foo"); /// let mut it = finder.rfind_iter(haystack); /// assert_eq!(Some(16), it.next()); /// assert_eq!(Some(8), it.next()); /// assert_eq!(Some(0), it.next()); /// assert_eq!(None, it.next()); /// ``` #[inline] pub fn rfind_iter<'a, 'h>( &'a self, haystack: &'h [u8], ) -> FindRevIter<'h, 'a> { FindRevIter::new(haystack, self.as_ref()) } /// Convert this finder into its owned variant, such that it no longer /// borrows the needle. /// /// If this is already an owned finder, then this is a no-op. Otherwise, /// this copies the needle. /// /// This is only available when the `std` feature is enabled. #[cfg(feature = "std")] #[inline] pub fn into_owned(self) -> FinderRev<'static> { FinderRev { searcher: self.searcher.into_owned() } } /// Convert this finder into its borrowed variant. /// /// This is primarily useful if your finder is owned and you'd like to /// store its borrowed variant in some intermediate data structure. /// /// Note that the lifetime parameter of the returned finder is tied to the /// lifetime of `self`, and may be shorter than the `'n` lifetime of the /// needle itself. Namely, a finder's needle can be either borrowed or /// owned, so the lifetime of the needle returned must necessarily be the /// shorter of the two. #[inline] pub fn as_ref(&self) -> FinderRev<'_> { FinderRev { searcher: self.searcher.as_ref() } } /// Returns the needle that this finder searches for. /// /// Note that the lifetime of the needle returned is tied to the lifetime /// of the finder, and may be shorter than the `'n` lifetime. Namely, a /// finder's needle can be either borrowed or owned, so the lifetime of the /// needle returned must necessarily be the shorter of the two. #[inline] pub fn needle(&self) -> &[u8] { self.searcher.needle() } } /// A builder for constructing non-default forward or reverse memmem finders. /// /// A builder is primarily useful for configuring a substring searcher. /// Currently, the only configuration exposed is the ability to disable /// heuristic prefilters used to speed up certain searches. #[derive(Clone, Debug, Default)] pub struct FinderBuilder { config: SearcherConfig, } impl FinderBuilder { /// Create a new finder builder with default settings. pub fn new() -> FinderBuilder { FinderBuilder::default() } /// Build a forward finder using the given needle from the current /// settings. pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>( &self, needle: &'n B, ) -> Finder<'n> { Finder { searcher: Searcher::new(self.config, needle.as_ref()) } } /// Build a reverse finder using the given needle from the current /// settings. pub fn build_reverse<'n, B: ?Sized + AsRef<[u8]>>( &self, needle: &'n B, ) -> FinderRev<'n> { FinderRev { searcher: SearcherRev::new(needle.as_ref()) } } /// Configure the prefilter setting for the finder. /// /// See the documentation for [`Prefilter`] for more discussion on why /// you might want to configure this. pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder { self.config.prefilter = prefilter; self } } /// The internal implementation of a forward substring searcher. /// /// The reality is that this is a "meta" searcher. Namely, depending on a /// variety of parameters (CPU support, target, needle size, haystack size and /// even dynamic properties such as prefilter effectiveness), the actual /// algorithm employed to do substring search may change. #[derive(Clone, Debug)] struct Searcher<'n> { /// The actual needle we're searching for. /// /// A CowBytes is like a Cow<[u8]>, except in no_std environments, it is /// specialized to a single variant (the borrowed form). needle: CowBytes<'n>, /// A collection of facts computed on the needle that are useful for more /// than one substring search algorithm. ninfo: NeedleInfo, /// A prefilter function, if it was deemed appropriate. /// /// Some substring search implementations (like Two-Way) benefit greatly /// if we can quickly find candidate starting positions for a match. prefn: Option, /// The actual substring implementation in use. kind: SearcherKind, } /// A collection of facts computed about a search needle. /// /// We group these things together because it's useful to be able to hand them /// to prefilters or substring algorithms that want them. #[derive(Clone, Copy, Debug)] pub(crate) struct NeedleInfo { /// The offsets of "rare" bytes detected in the needle. /// /// This is meant to be a heuristic in order to maximize the effectiveness /// of vectorized code. Namely, vectorized code tends to focus on only /// one or two bytes. If we pick bytes from the needle that occur /// infrequently, then more time will be spent in the vectorized code and /// will likely make the overall search (much) faster. /// /// Of course, this is only a heuristic based on a background frequency /// distribution of bytes. But it tends to work very well in practice. pub(crate) rarebytes: RareNeedleBytes, /// A Rabin-Karp hash of the needle. /// /// This is store here instead of in a more specific Rabin-Karp search /// since Rabin-Karp may be used even if another SearchKind corresponds /// to some other search implementation. e.g., If measurements suggest RK /// is faster in some cases or if a search implementation can't handle /// particularly small haystack. (Moreover, we cannot use RK *generally*, /// since its worst case time is multiplicative. Instead, we only use it /// some small haystacks, where "small" is a constant.) pub(crate) nhash: NeedleHash, } /// Configuration for substring search. #[derive(Clone, Copy, Debug, Default)] struct SearcherConfig { /// This permits changing the behavior of the prefilter, since it can have /// a variable impact on performance. prefilter: Prefilter, } #[derive(Clone, Debug)] enum SearcherKind { /// A special case for empty needles. An empty needle always matches, even /// in an empty haystack. Empty, /// This is used whenever the needle is a single byte. In this case, we /// always use memchr. OneByte(u8), /// Two-Way is the generic work horse and is what provides our additive /// linear time guarantee. In general, it's used when the needle is bigger /// than 8 bytes or so. TwoWay(twoway::Forward), #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] GenericSIMD128(x86::sse::Forward), #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] GenericSIMD256(x86::avx::Forward), } impl<'n> Searcher<'n> { #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> { use self::SearcherKind::*; let ninfo = NeedleInfo::new(needle); let prefn = prefilter::forward(&config.prefilter, &ninfo.rarebytes, needle); let kind = if needle.len() == 0 { Empty } else if needle.len() == 1 { OneByte(needle[0]) } else if let Some(fwd) = x86::avx::Forward::new(&ninfo, needle) { GenericSIMD256(fwd) } else if let Some(fwd) = x86::sse::Forward::new(&ninfo, needle) { GenericSIMD128(fwd) } else { TwoWay(twoway::Forward::new(needle)) }; Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind } } #[cfg(not(all(not(miri), target_arch = "x86_64", memchr_runtime_simd)))] fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> { use self::SearcherKind::*; let ninfo = NeedleInfo::new(needle); let prefn = prefilter::forward(&config.prefilter, &ninfo.rarebytes, needle); let kind = if needle.len() == 0 { Empty } else if needle.len() == 1 { OneByte(needle[0]) } else { TwoWay(twoway::Forward::new(needle)) }; Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind } } /// Return a fresh prefilter state that can be used with this searcher. /// A prefilter state is used to track the effectiveness of a searcher's /// prefilter for speeding up searches. Therefore, the prefilter state /// should generally be reused on subsequent searches (such as in an /// iterator). For searches on a different haystack, then a new prefilter /// state should be used. /// /// This always initializes a valid (but possibly inert) prefilter state /// even if this searcher does not have a prefilter enabled. fn prefilter_state(&self) -> PrefilterState { if self.prefn.is_none() { PrefilterState::inert() } else { PrefilterState::new() } } fn needle(&self) -> &[u8] { self.needle.as_slice() } fn as_ref(&self) -> Searcher<'_> { use self::SearcherKind::*; let kind = match self.kind { Empty => Empty, OneByte(b) => OneByte(b), TwoWay(tw) => TwoWay(tw), #[cfg(all( not(miri), target_arch = "x86_64", memchr_runtime_simd ))] GenericSIMD128(gs) => GenericSIMD128(gs), #[cfg(all( not(miri), target_arch = "x86_64", memchr_runtime_simd ))] GenericSIMD256(gs) => GenericSIMD256(gs), }; Searcher { needle: CowBytes::new(self.needle()), ninfo: self.ninfo, prefn: self.prefn, kind, } } #[cfg(feature = "std")] fn into_owned(self) -> Searcher<'static> { use self::SearcherKind::*; let kind = match self.kind { Empty => Empty, OneByte(b) => OneByte(b), TwoWay(tw) => TwoWay(tw), #[cfg(all( not(miri), target_arch = "x86_64", memchr_runtime_simd ))] GenericSIMD128(gs) => GenericSIMD128(gs), #[cfg(all( not(miri), target_arch = "x86_64", memchr_runtime_simd ))] GenericSIMD256(gs) => GenericSIMD256(gs), }; Searcher { needle: self.needle.into_owned(), ninfo: self.ninfo, prefn: self.prefn, kind, } } /// Implements forward substring search by selecting the implementation /// chosen at construction and executing it on the given haystack with the /// prefilter's current state of effectiveness. #[inline(always)] fn find( &self, state: &mut PrefilterState, haystack: &[u8], ) -> Option { use self::SearcherKind::*; let needle = self.needle(); if haystack.len() < needle.len() { return None; } match self.kind { Empty => Some(0), OneByte(b) => crate::memchr(b, haystack), TwoWay(ref tw) => { // For very short haystacks (e.g., where the prefilter probably // can't run), it's faster to just run RK. if rabinkarp::is_fast(haystack, needle) { rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) } else { self.find_tw(tw, state, haystack, needle) } } #[cfg(all( not(miri), target_arch = "x86_64", memchr_runtime_simd ))] GenericSIMD128(ref gs) => { // The SIMD matcher can't handle particularly short haystacks, // so we fall back to RK in these cases. if haystack.len() < gs.min_haystack_len() { rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) } else { gs.find(haystack, needle) } } #[cfg(all( not(miri), target_arch = "x86_64", memchr_runtime_simd ))] GenericSIMD256(ref gs) => { // The SIMD matcher can't handle particularly short haystacks, // so we fall back to RK in these cases. if haystack.len() < gs.min_haystack_len() { rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) } else { gs.find(haystack, needle) } } } } /// Calls Two-Way on the given haystack/needle. /// /// This is marked as unlineable since it seems to have a better overall /// effect on benchmarks. However, this is one of those cases where /// inlining it results an improvement in other benchmarks too, so I /// suspect we just don't have enough data yet to make the right call here. /// /// I suspect the main problem is that this function contains two different /// inlined copies of Two-Way: one with and one without prefilters enabled. #[inline(never)] fn find_tw( &self, tw: &twoway::Forward, state: &mut PrefilterState, haystack: &[u8], needle: &[u8], ) -> Option { if let Some(prefn) = self.prefn { // We used to look at the length of a haystack here. That is, if // it was too small, then don't bother with the prefilter. But two // things changed: the prefilter falls back to memchr for small // haystacks, and, above, Rabin-Karp is employed for tiny haystacks // anyway. if state.is_effective() { let mut pre = Pre { state, prefn, ninfo: &self.ninfo }; return tw.find(Some(&mut pre), haystack, needle); } } tw.find(None, haystack, needle) } } impl NeedleInfo { pub(crate) fn new(needle: &[u8]) -> NeedleInfo { NeedleInfo { rarebytes: RareNeedleBytes::forward(needle), nhash: NeedleHash::forward(needle), } } } /// The internal implementation of a reverse substring searcher. /// /// See the forward searcher docs for more details. Currently, the reverse /// searcher is considerably simpler since it lacks prefilter support. This /// was done because it adds a lot of code, and more surface area to test. And /// in particular, it's not clear whether a prefilter on reverse searching is /// worth it. (If you have a compelling use case, please file an issue!) #[derive(Clone, Debug)] struct SearcherRev<'n> { /// The actual needle we're searching for. needle: CowBytes<'n>, /// A Rabin-Karp hash of the needle. nhash: NeedleHash, /// The actual substring implementation in use. kind: SearcherRevKind, } #[derive(Clone, Debug)] enum SearcherRevKind { /// A special case for empty needles. An empty needle always matches, even /// in an empty haystack. Empty, /// This is used whenever the needle is a single byte. In this case, we /// always use memchr. OneByte(u8), /// Two-Way is the generic work horse and is what provides our additive /// linear time guarantee. In general, it's used when the needle is bigger /// than 8 bytes or so. TwoWay(twoway::Reverse), } impl<'n> SearcherRev<'n> { fn new(needle: &'n [u8]) -> SearcherRev<'n> { use self::SearcherRevKind::*; let kind = if needle.len() == 0 { Empty } else if needle.len() == 1 { OneByte(needle[0]) } else { TwoWay(twoway::Reverse::new(needle)) }; SearcherRev { needle: CowBytes::new(needle), nhash: NeedleHash::reverse(needle), kind, } } fn needle(&self) -> &[u8] { self.needle.as_slice() } fn as_ref(&self) -> SearcherRev<'_> { use self::SearcherRevKind::*; let kind = match self.kind { Empty => Empty, OneByte(b) => OneByte(b), TwoWay(tw) => TwoWay(tw), }; SearcherRev { needle: CowBytes::new(self.needle()), nhash: self.nhash, kind, } } #[cfg(feature = "std")] fn into_owned(self) -> SearcherRev<'static> { use self::SearcherRevKind::*; let kind = match self.kind { Empty => Empty, OneByte(b) => OneByte(b), TwoWay(tw) => TwoWay(tw), }; SearcherRev { needle: self.needle.into_owned(), nhash: self.nhash, kind, } } /// Implements reverse substring search by selecting the implementation /// chosen at construction and executing it on the given haystack with the /// prefilter's current state of effectiveness. #[inline(always)] fn rfind(&self, haystack: &[u8]) -> Option { use self::SearcherRevKind::*; let needle = self.needle(); if haystack.len() < needle.len() { return None; } match self.kind { Empty => Some(haystack.len()), OneByte(b) => crate::memrchr(b, haystack), TwoWay(ref tw) => { // For very short haystacks (e.g., where the prefilter probably // can't run), it's faster to just run RK. if rabinkarp::is_fast(haystack, needle) { rabinkarp::rfind_with(&self.nhash, haystack, needle) } else { tw.rfind(haystack, needle) } } } } } /// This module defines some generic quickcheck properties useful for testing /// any substring search algorithm. It also runs those properties for the /// top-level public API memmem routines. (The properties are also used to /// test various substring search implementations more granularly elsewhere as /// well.) #[cfg(all(test, feature = "std", not(miri)))] mod proptests { // N.B. This defines the quickcheck tests using the properties defined // below. Because of macro-visibility weirdness, the actual macro is // defined at the top of this file. define_memmem_quickcheck_tests!(super::find, super::rfind); /// Check that every prefix of the given byte string is a substring. pub(crate) fn prefix_is_substring( reverse: bool, bs: &[u8], mut search: impl FnMut(&[u8], &[u8]) -> Option, ) -> bool { if bs.is_empty() { return true; } for i in 0..(bs.len() - 1) { let prefix = &bs[..i]; if reverse { assert_eq!(naive_rfind(bs, prefix), search(bs, prefix)); } else { assert_eq!(naive_find(bs, prefix), search(bs, prefix)); } } true } /// Check that every suffix of the given byte string is a substring. pub(crate) fn suffix_is_substring( reverse: bool, bs: &[u8], mut search: impl FnMut(&[u8], &[u8]) -> Option, ) -> bool { if bs.is_empty() { return true; } for i in 0..(bs.len() - 1) { let suffix = &bs[i..]; if reverse { assert_eq!(naive_rfind(bs, suffix), search(bs, suffix)); } else { assert_eq!(naive_find(bs, suffix), search(bs, suffix)); } } true } /// Check that naive substring search matches the result of the given search /// algorithm. pub(crate) fn matches_naive( reverse: bool, haystack: &[u8], needle: &[u8], mut search: impl FnMut(&[u8], &[u8]) -> Option, ) -> bool { if reverse { naive_rfind(haystack, needle) == search(haystack, needle) } else { naive_find(haystack, needle) == search(haystack, needle) } } /// Naively search forwards for the given needle in the given haystack. fn naive_find(haystack: &[u8], needle: &[u8]) -> Option { if needle.is_empty() { return Some(0); } else if haystack.len() < needle.len() { return None; } for i in 0..(haystack.len() - needle.len() + 1) { if needle == &haystack[i..i + needle.len()] { return Some(i); } } None } /// Naively search in reverse for the given needle in the given haystack. fn naive_rfind(haystack: &[u8], needle: &[u8]) -> Option { if needle.is_empty() { return Some(haystack.len()); } else if haystack.len() < needle.len() { return None; } for i in (0..(haystack.len() - needle.len() + 1)).rev() { if needle == &haystack[i..i + needle.len()] { return Some(i); } } None } } /// This module defines some hand-written "simple" substring tests. It /// also provides routines for easily running them on any substring search /// implementation. #[cfg(test)] mod testsimples { define_memmem_simple_tests!(super::find, super::rfind); /// Each test is a (needle, haystack, expected_fwd, expected_rev) tuple. type SearchTest = (&'static str, &'static str, Option, Option); const SEARCH_TESTS: &'static [SearchTest] = &[ ("", "", Some(0), Some(0)), ("", "a", Some(0), Some(1)), ("", "ab", Some(0), Some(2)), ("", "abc", Some(0), Some(3)), ("a", "", None, None), ("a", "a", Some(0), Some(0)), ("a", "aa", Some(0), Some(1)), ("a", "ba", Some(1), Some(1)), ("a", "bba", Some(2), Some(2)), ("a", "bbba", Some(3), Some(3)), ("a", "bbbab", Some(3), Some(3)), ("a", "bbbabb", Some(3), Some(3)), ("a", "bbbabbb", Some(3), Some(3)), ("a", "bbbbbb", None, None), ("ab", "", None, None), ("ab", "a", None, None), ("ab", "b", None, None), ("ab", "ab", Some(0), Some(0)), ("ab", "aab", Some(1), Some(1)), ("ab", "aaab", Some(2), Some(2)), ("ab", "abaab", Some(0), Some(3)), ("ab", "baaab", Some(3), Some(3)), ("ab", "acb", None, None), ("ab", "abba", Some(0), Some(0)), ("abc", "ab", None, None), ("abc", "abc", Some(0), Some(0)), ("abc", "abcz", Some(0), Some(0)), ("abc", "abczz", Some(0), Some(0)), ("abc", "zabc", Some(1), Some(1)), ("abc", "zzabc", Some(2), Some(2)), ("abc", "azbc", None, None), ("abc", "abzc", None, None), ("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)), ("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)), ("xyz", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz", Some(32), Some(32)), // Failures caught by quickcheck. ("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)), ("\u{0}\u{1e}", "\u{1e}\u{0}", None, None), ]; /// Run the substring search tests. `search` should be a closure that /// accepts a haystack and a needle and returns the starting position /// of the first occurrence of needle in the haystack, or `None` if one /// doesn't exist. pub(crate) fn run_search_tests_fwd( mut search: impl FnMut(&[u8], &[u8]) -> Option, ) { for &(needle, haystack, expected_fwd, _) in SEARCH_TESTS { let (n, h) = (needle.as_bytes(), haystack.as_bytes()); assert_eq!( expected_fwd, search(h, n), "needle: {:?}, haystack: {:?}, expected: {:?}", n, h, expected_fwd ); } } /// Run the substring search tests. `search` should be a closure that /// accepts a haystack and a needle and returns the starting position of /// the last occurrence of needle in the haystack, or `None` if one doesn't /// exist. pub(crate) fn run_search_tests_rev( mut search: impl FnMut(&[u8], &[u8]) -> Option, ) { for &(needle, haystack, _, expected_rev) in SEARCH_TESTS { let (n, h) = (needle.as_bytes(), haystack.as_bytes()); assert_eq!( expected_rev, search(h, n), "needle: {:?}, haystack: {:?}, expected: {:?}", n, h, expected_rev ); } } } memchr-2.4.1/src/memmem/prefilter/fallback.rs000064400000000000000000000113110072674642500172750ustar 00000000000000/* This module implements a "fallback" prefilter that only relies on memchr to function. While memchr works best when it's explicitly vectorized, its fallback implementations are fast enough to make a prefilter like this worthwhile. The essence of this implementation is to identify two rare bytes in a needle based on a background frequency distribution of bytes. We then run memchr on the rarer byte. For each match, we use the second rare byte as a guard to quickly check if a match is possible. If the position passes the guard test, then we do a naive memcmp to confirm the match. In practice, this formulation works amazingly well, primarily because of the heuristic use of a background frequency distribution. However, it does have a number of weaknesses where it can get quite slow when its background frequency distribution doesn't line up with the haystack being searched. This is why we have specialized vector routines that essentially take this idea and move the guard check into vectorized code. (Those specialized vector routines do still make use of the background frequency distribution of bytes though.) This fallback implementation was originally formulated in regex many moons ago: https://github.com/rust-lang/regex/blob/3db8722d0b204a85380fe2a65e13d7065d7dd968/src/literal/imp.rs#L370-L501 Prior to that, I'm not aware of anyone using this technique in any prominent substring search implementation. Although, I'm sure folks have had this same insight long before me. Another version of this also appeared in bstr: https://github.com/BurntSushi/bstr/blob/a444256ca7407fe180ee32534688549655b7a38e/src/search/prefilter.rs#L83-L340 */ use crate::memmem::{ prefilter::{PrefilterFnTy, PrefilterState}, NeedleInfo, }; // Check that the functions below satisfy the Prefilter function type. const _: PrefilterFnTy = find; /// Look for a possible occurrence of needle. The position returned /// corresponds to the beginning of the occurrence, if one exists. /// /// Callers may assume that this never returns false negatives (i.e., it /// never misses an actual occurrence), but must check that the returned /// position corresponds to a match. That is, it can return false /// positives. /// /// This should only be used when Freqy is constructed for forward /// searching. pub(crate) fn find( prestate: &mut PrefilterState, ninfo: &NeedleInfo, haystack: &[u8], needle: &[u8], ) -> Option { let mut i = 0; let (rare1i, rare2i) = ninfo.rarebytes.as_rare_usize(); let (rare1, rare2) = ninfo.rarebytes.as_rare_bytes(needle); while prestate.is_effective() { // Use a fast vectorized implementation to skip to the next // occurrence of the rarest byte (heuristically chosen) in the // needle. let found = crate::memchr(rare1, &haystack[i..])?; prestate.update(found); i += found; // If we can't align our first match with the haystack, then a // match is impossible. if i < rare1i { i += 1; continue; } // Align our rare2 byte with the haystack. A mismatch means that // a match is impossible. let aligned_rare2i = i - rare1i + rare2i; if haystack.get(aligned_rare2i) != Some(&rare2) { i += 1; continue; } // We've done what we can. There might be a match here. return Some(i - rare1i); } // The only way we get here is if we believe our skipping heuristic // has become ineffective. We're allowed to return false positives, // so return the position at which we advanced to, aligned to the // haystack. Some(i.saturating_sub(rare1i)) } #[cfg(all(test, feature = "std"))] mod tests { use super::*; fn freqy_find(haystack: &[u8], needle: &[u8]) -> Option { let ninfo = NeedleInfo::new(needle); let mut prestate = PrefilterState::new(); find(&mut prestate, &ninfo, haystack, needle) } #[test] fn freqy_forward() { assert_eq!(Some(0), freqy_find(b"BARFOO", b"BAR")); assert_eq!(Some(3), freqy_find(b"FOOBAR", b"BAR")); assert_eq!(Some(0), freqy_find(b"zyzz", b"zyzy")); assert_eq!(Some(2), freqy_find(b"zzzy", b"zyzy")); assert_eq!(None, freqy_find(b"zazb", b"zyzy")); assert_eq!(Some(0), freqy_find(b"yzyy", b"yzyz")); assert_eq!(Some(2), freqy_find(b"yyyz", b"yzyz")); assert_eq!(None, freqy_find(b"yayb", b"yzyz")); } #[test] #[cfg(not(miri))] fn prefilter_permutations() { use crate::memmem::prefilter::tests::PrefilterTest; // SAFETY: super::find is safe to call for all inputs and on all // platforms. unsafe { PrefilterTest::run_all_tests(super::find) }; } } memchr-2.4.1/src/memmem/prefilter/genericsimd.rs000064400000000000000000000176440072674642500200460ustar 00000000000000use core::mem::size_of; use crate::memmem::{ prefilter::{PrefilterFnTy, PrefilterState}, vector::Vector, NeedleInfo, }; /// The implementation of the forward vector accelerated candidate finder. /// /// This is inspired by the "generic SIMD" algorithm described here: /// http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd /// /// The main difference is that this is just a prefilter. That is, it reports /// candidates once they are seen and doesn't attempt to confirm them. Also, /// the bytes this routine uses to check for candidates are selected based on /// an a priori background frequency distribution. This means that on most /// haystacks, this will on average spend more time in vectorized code than you /// would if you just selected the first and last bytes of the needle. /// /// Note that a non-prefilter variant of this algorithm can be found in the /// parent module, but it only works on smaller needles. /// /// `prestate`, `ninfo`, `haystack` and `needle` are the four prefilter /// function parameters. `fallback` is a prefilter that is used if the haystack /// is too small to be handled with the given vector size. /// /// This routine is not safe because it is intended for callers to specialize /// this with a particular vector (e.g., __m256i) and then call it with the /// relevant target feature (e.g., avx2) enabled. /// /// # Panics /// /// If `needle.len() <= 1`, then this panics. /// /// # Safety /// /// Since this is meant to be used with vector functions, callers need to /// specialize this inside of a function with a `target_feature` attribute. /// Therefore, callers must ensure that whatever target feature is being used /// supports the vector functions that this function is specialized for. (For /// the specific vector functions used, see the Vector trait implementations.) #[inline(always)] pub(crate) unsafe fn find( prestate: &mut PrefilterState, ninfo: &NeedleInfo, haystack: &[u8], needle: &[u8], fallback: PrefilterFnTy, ) -> Option { assert!(needle.len() >= 2, "needle must be at least 2 bytes"); let (rare1i, rare2i) = ninfo.rarebytes.as_rare_ordered_usize(); let min_haystack_len = rare2i + size_of::(); if haystack.len() < min_haystack_len { return fallback(prestate, ninfo, haystack, needle); } let start_ptr = haystack.as_ptr(); let end_ptr = start_ptr.add(haystack.len()); let max_ptr = end_ptr.sub(min_haystack_len); let mut ptr = start_ptr; let rare1chunk = V::splat(needle[rare1i]); let rare2chunk = V::splat(needle[rare2i]); // N.B. I did experiment with unrolling the loop to deal with size(V) // bytes at a time and 2*size(V) bytes at a time. The double unroll // was marginally faster while the quadruple unroll was unambiguously // slower. In the end, I decided the complexity from unrolling wasn't // worth it. I used the memmem/krate/prebuilt/huge-en/ benchmarks to // compare. while ptr <= max_ptr { let m = find_in_chunk2(ptr, rare1i, rare2i, rare1chunk, rare2chunk); if let Some(chunki) = m { return Some(matched(prestate, start_ptr, ptr, chunki)); } ptr = ptr.add(size_of::()); } if ptr < end_ptr { // This routine immediately quits if a candidate match is found. // That means that if we're here, no candidate matches have been // found at or before 'ptr'. Thus, we don't need to mask anything // out even though we might technically search part of the haystack // that we've already searched (because we know it can't match). ptr = max_ptr; let m = find_in_chunk2(ptr, rare1i, rare2i, rare1chunk, rare2chunk); if let Some(chunki) = m { return Some(matched(prestate, start_ptr, ptr, chunki)); } } prestate.update(haystack.len()); None } // Below are two different techniques for checking whether a candidate // match exists in a given chunk or not. find_in_chunk2 checks two bytes // where as find_in_chunk3 checks three bytes. The idea behind checking // three bytes is that while we do a bit more work per iteration, we // decrease the chances of a false positive match being reported and thus // make the search faster overall. This actually works out for the // memmem/krate/prebuilt/huge-en/never-all-common-bytes benchmark, where // using find_in_chunk3 is about 25% faster than find_in_chunk2. However, // it turns out that find_in_chunk2 is faster for all other benchmarks, so // perhaps the extra check isn't worth it in practice. // // For now, we go with find_in_chunk2, but we leave find_in_chunk3 around // to make it easy to switch to and benchmark when possible. /// Search for an occurrence of two rare bytes from the needle in the current /// chunk pointed to by ptr. /// /// rare1chunk and rare2chunk correspond to vectors with the rare1 and rare2 /// bytes repeated in each 8-bit lane, respectively. /// /// # Safety /// /// It must be safe to do an unaligned read of size(V) bytes starting at both /// (ptr + rare1i) and (ptr + rare2i). #[inline(always)] unsafe fn find_in_chunk2( ptr: *const u8, rare1i: usize, rare2i: usize, rare1chunk: V, rare2chunk: V, ) -> Option { let chunk0 = V::load_unaligned(ptr.add(rare1i)); let chunk1 = V::load_unaligned(ptr.add(rare2i)); let eq0 = chunk0.cmpeq(rare1chunk); let eq1 = chunk1.cmpeq(rare2chunk); let match_offsets = eq0.and(eq1).movemask(); if match_offsets == 0 { return None; } Some(match_offsets.trailing_zeros() as usize) } /// Search for an occurrence of two rare bytes and the first byte (even if one /// of the rare bytes is equivalent to the first byte) from the needle in the /// current chunk pointed to by ptr. /// /// firstchunk, rare1chunk and rare2chunk correspond to vectors with the first, /// rare1 and rare2 bytes repeated in each 8-bit lane, respectively. /// /// # Safety /// /// It must be safe to do an unaligned read of size(V) bytes starting at ptr, /// (ptr + rare1i) and (ptr + rare2i). #[allow(dead_code)] #[inline(always)] unsafe fn find_in_chunk3( ptr: *const u8, rare1i: usize, rare2i: usize, firstchunk: V, rare1chunk: V, rare2chunk: V, ) -> Option { let chunk0 = V::load_unaligned(ptr); let chunk1 = V::load_unaligned(ptr.add(rare1i)); let chunk2 = V::load_unaligned(ptr.add(rare2i)); let eq0 = chunk0.cmpeq(firstchunk); let eq1 = chunk1.cmpeq(rare1chunk); let eq2 = chunk2.cmpeq(rare2chunk); let match_offsets = eq0.and(eq1).and(eq2).movemask(); if match_offsets == 0 { return None; } Some(match_offsets.trailing_zeros() as usize) } /// Accepts a chunk-relative offset and returns a haystack relative offset /// after updating the prefilter state. /// /// Why do we use this unlineable function when a search completes? Well, /// I don't know. Really. Obviously this function was not here initially. /// When doing profiling, the codegen for the inner loop here looked bad and /// I didn't know why. There were a couple extra 'add' instructions and an /// extra 'lea' instruction that I couldn't explain. I hypothesized that the /// optimizer was having trouble untangling the hot code in the loop from the /// code that deals with a candidate match. By putting the latter into an /// unlineable function, it kind of forces the issue and it had the intended /// effect: codegen improved measurably. It's good for a ~10% improvement /// across the board on the memmem/krate/prebuilt/huge-en/ benchmarks. #[cold] #[inline(never)] fn matched( prestate: &mut PrefilterState, start_ptr: *const u8, ptr: *const u8, chunki: usize, ) -> usize { let found = diff(ptr, start_ptr) + chunki; prestate.update(found); found } /// Subtract `b` from `a` and return the difference. `a` must be greater than /// or equal to `b`. fn diff(a: *const u8, b: *const u8) -> usize { debug_assert!(a >= b); (a as usize) - (b as usize) } memchr-2.4.1/src/memmem/prefilter/mod.rs000064400000000000000000000540340072674642500163260ustar 00000000000000use crate::memmem::{rarebytes::RareNeedleBytes, NeedleInfo}; mod fallback; #[cfg(all(target_arch = "x86_64", memchr_runtime_simd))] mod genericsimd; #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] mod x86; /// The maximum frequency rank permitted for the fallback prefilter. If the /// rarest byte in the needle has a frequency rank above this value, then no /// prefilter is used if the fallback prefilter would otherwise be selected. const MAX_FALLBACK_RANK: usize = 250; /// A combination of prefilter effectiveness state, the prefilter function and /// the needle info required to run a prefilter. /// /// For the most part, these are grouped into a single type for convenience, /// instead of needing to pass around all three as distinct function /// parameters. pub(crate) struct Pre<'a> { /// State that tracks the effectiveness of a prefilter. pub(crate) state: &'a mut PrefilterState, /// The actual prefilter function. pub(crate) prefn: PrefilterFn, /// Information about a needle, such as its RK hash and rare byte offsets. pub(crate) ninfo: &'a NeedleInfo, } impl<'a> Pre<'a> { /// Call this prefilter on the given haystack with the given needle. #[inline(always)] pub(crate) fn call( &mut self, haystack: &[u8], needle: &[u8], ) -> Option { self.prefn.call(self.state, self.ninfo, haystack, needle) } /// Return true if and only if this prefilter should be used. #[inline(always)] pub(crate) fn should_call(&mut self) -> bool { self.state.is_effective() } } /// A prefilter function. /// /// A prefilter function describes both forward and reverse searches. /// (Although, we don't currently implement prefilters for reverse searching.) /// In the case of a forward search, the position returned corresponds to /// the starting offset of a match (confirmed or possible). Its minimum /// value is `0`, and its maximum value is `haystack.len() - 1`. In the case /// of a reverse search, the position returned corresponds to the position /// immediately after a match (confirmed or possible). Its minimum value is `1` /// and its maximum value is `haystack.len()`. /// /// In both cases, the position returned is the starting (or ending) point of a /// _possible_ match. That is, returning a false positive is okay. A prefilter, /// however, must never return any false negatives. That is, if a match exists /// at a particular position `i`, then a prefilter _must_ return that position. /// It cannot skip past it. /// /// # Safety /// /// A prefilter function is not safe to create, since not all prefilters are /// safe to call in all contexts. (e.g., A prefilter that uses AVX instructions /// may only be called on x86_64 CPUs with the relevant AVX feature enabled.) /// Thus, callers must ensure that when a prefilter function is created that it /// is safe to call for the current environment. #[derive(Clone, Copy)] pub(crate) struct PrefilterFn(PrefilterFnTy); /// The type of a prefilter function. All prefilters must satisfy this /// signature. /// /// Using a function pointer like this does inhibit inlining, but it does /// eliminate branching and the extra costs associated with copying a larger /// enum. Note also, that using Box can't really work /// here, since we want to work in contexts that don't have dynamic memory /// allocation. Moreover, in the default configuration of this crate on x86_64 /// CPUs released in the past ~decade, we will use an AVX2-optimized prefilter, /// which generally won't be inlineable into the surrounding code anyway. /// (Unless AVX2 is enabled at compile time, but this is typically rare, since /// it produces a non-portable binary.) pub(crate) type PrefilterFnTy = unsafe fn( prestate: &mut PrefilterState, ninfo: &NeedleInfo, haystack: &[u8], needle: &[u8], ) -> Option; impl PrefilterFn { /// Create a new prefilter function from the function pointer given. /// /// # Safety /// /// Callers must ensure that the given prefilter function is safe to call /// for all inputs in the current environment. For example, if the given /// prefilter function uses AVX instructions, then the caller must ensure /// that the appropriate AVX CPU features are enabled. pub(crate) unsafe fn new(prefn: PrefilterFnTy) -> PrefilterFn { PrefilterFn(prefn) } /// Call the underlying prefilter function with the given arguments. pub fn call( self, prestate: &mut PrefilterState, ninfo: &NeedleInfo, haystack: &[u8], needle: &[u8], ) -> Option { // SAFETY: Callers have the burden of ensuring that a prefilter // function is safe to call for all inputs in the current environment. unsafe { (self.0)(prestate, ninfo, haystack, needle) } } } impl core::fmt::Debug for PrefilterFn { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { "".fmt(f) } } /// Prefilter controls whether heuristics are used to accelerate searching. /// /// A prefilter refers to the idea of detecting candidate matches very quickly, /// and then confirming whether those candidates are full matches. This /// idea can be quite effective since it's often the case that looking for /// candidates can be a lot faster than running a complete substring search /// over the entire input. Namely, looking for candidates can be done with /// extremely fast vectorized code. /// /// The downside of a prefilter is that it assumes false positives (which are /// candidates generated by a prefilter that aren't matches) are somewhat rare /// relative to the frequency of full matches. That is, if a lot of false /// positives are generated, then it's possible for search time to be worse /// than if the prefilter wasn't enabled in the first place. /// /// Another downside of a prefilter is that it can result in highly variable /// performance, where some cases are extraordinarily fast and others aren't. /// Typically, variable performance isn't a problem, but it may be for your use /// case. /// /// The use of prefilters in this implementation does use a heuristic to detect /// when a prefilter might not be carrying its weight, and will dynamically /// disable its use. Nevertheless, this configuration option gives callers /// the ability to disable prefilters if you have knowledge that they won't be /// useful. #[derive(Clone, Copy, Debug)] #[non_exhaustive] pub enum Prefilter { /// Never used a prefilter in substring search. None, /// Automatically detect whether a heuristic prefilter should be used. If /// it is used, then heuristics will be used to dynamically disable the /// prefilter if it is believed to not be carrying its weight. Auto, } impl Default for Prefilter { fn default() -> Prefilter { Prefilter::Auto } } impl Prefilter { pub(crate) fn is_none(&self) -> bool { match *self { Prefilter::None => true, _ => false, } } } /// PrefilterState tracks state associated with the effectiveness of a /// prefilter. It is used to track how many bytes, on average, are skipped by /// the prefilter. If this average dips below a certain threshold over time, /// then the state renders the prefilter inert and stops using it. /// /// A prefilter state should be created for each search. (Where creating an /// iterator is treated as a single search.) A prefilter state should only be /// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert /// `PrefilterState`. #[derive(Clone, Debug)] pub(crate) struct PrefilterState { /// The number of skips that has been executed. This is always 1 greater /// than the actual number of skips. The special sentinel value of 0 /// indicates that the prefilter is inert. This is useful to avoid /// additional checks to determine whether the prefilter is still /// "effective." Once a prefilter becomes inert, it should no longer be /// used (according to our heuristics). skips: u32, /// The total number of bytes that have been skipped. skipped: u32, } impl PrefilterState { /// The minimum number of skip attempts to try before considering whether /// a prefilter is effective or not. const MIN_SKIPS: u32 = 50; /// The minimum amount of bytes that skipping must average. /// /// This value was chosen based on varying it and checking /// the microbenchmarks. In particular, this can impact the /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set /// too low. const MIN_SKIP_BYTES: u32 = 8; /// Create a fresh prefilter state. pub(crate) fn new() -> PrefilterState { PrefilterState { skips: 1, skipped: 0 } } /// Create a fresh prefilter state that is always inert. pub(crate) fn inert() -> PrefilterState { PrefilterState { skips: 0, skipped: 0 } } /// Update this state with the number of bytes skipped on the last /// invocation of the prefilter. #[inline] pub(crate) fn update(&mut self, skipped: usize) { self.skips = self.skips.saturating_add(1); // We need to do this dance since it's technically possible for // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the // size of a prefilter state.) if skipped > core::u32::MAX as usize { self.skipped = core::u32::MAX; } else { self.skipped = self.skipped.saturating_add(skipped as u32); } } /// Return true if and only if this state indicates that a prefilter is /// still effective. #[inline] pub(crate) fn is_effective(&mut self) -> bool { if self.is_inert() { return false; } if self.skips() < PrefilterState::MIN_SKIPS { return true; } if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() { return true; } // We're inert. self.skips = 0; false } #[inline] fn is_inert(&self) -> bool { self.skips == 0 } #[inline] fn skips(&self) -> u32 { self.skips.saturating_sub(1) } } /// Determine which prefilter function, if any, to use. /// /// This only applies to x86_64 when runtime SIMD detection is enabled (which /// is the default). In general, we try to use an AVX prefilter, followed by /// SSE and then followed by a generic one based on memchr. #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] #[inline(always)] pub(crate) fn forward( config: &Prefilter, rare: &RareNeedleBytes, needle: &[u8], ) -> Option { if config.is_none() || needle.len() <= 1 { return None; } #[cfg(feature = "std")] { if cfg!(memchr_runtime_avx) { if is_x86_feature_detected!("avx2") { // SAFETY: x86::avx::find only requires the avx2 feature, // which we've just checked above. return unsafe { Some(PrefilterFn::new(x86::avx::find)) }; } } } if cfg!(memchr_runtime_sse2) { // SAFETY: x86::sse::find only requires the sse2 feature, which is // guaranteed to be available on x86_64. return unsafe { Some(PrefilterFn::new(x86::sse::find)) }; } // Check that our rarest byte has a reasonably low rank. The main issue // here is that the fallback prefilter can perform pretty poorly if it's // given common bytes. So we try to avoid the worst cases here. let (rare1_rank, _) = rare.as_ranks(needle); if rare1_rank <= MAX_FALLBACK_RANK { // SAFETY: fallback::find is safe to call in all environments. return unsafe { Some(PrefilterFn::new(fallback::find)) }; } None } /// Determine which prefilter function, if any, to use. /// /// Since SIMD is currently only supported on x86_64, this will just select /// the fallback prefilter if the rare bytes provided have a low enough rank. #[cfg(not(all(not(miri), target_arch = "x86_64", memchr_runtime_simd)))] #[inline(always)] pub(crate) fn forward( config: &Prefilter, rare: &RareNeedleBytes, needle: &[u8], ) -> Option { if config.is_none() || needle.len() <= 1 { return None; } let (rare1_rank, _) = rare.as_ranks(needle); if rare1_rank <= MAX_FALLBACK_RANK { // SAFETY: fallback::find is safe to call in all environments. return unsafe { Some(PrefilterFn::new(fallback::find)) }; } None } /// Return the minimum length of the haystack in which a prefilter should be /// used. If the haystack is below this length, then it's probably not worth /// the overhead of running the prefilter. /// /// We used to look at the length of a haystack here. That is, if it was too /// small, then don't bother with the prefilter. But two things changed: /// the prefilter falls back to memchr for small haystacks, and, at the /// meta-searcher level, Rabin-Karp is employed for tiny haystacks anyway. /// /// We keep it around for now in case we want to bring it back. #[allow(dead_code)] pub(crate) fn minimum_len(_haystack: &[u8], needle: &[u8]) -> usize { // If the haystack length isn't greater than needle.len() * FACTOR, then // no prefilter will be used. The presumption here is that since there // are so few bytes to check, it's not worth running the prefilter since // there will need to be a validation step anyway. Thus, the prefilter is // largely redundant work. // // Increase the factor noticeably hurts the // memmem/krate/prebuilt/teeny-*/never-john-watson benchmarks. const PREFILTER_LENGTH_FACTOR: usize = 2; const VECTOR_MIN_LENGTH: usize = 16; let min = core::cmp::max( VECTOR_MIN_LENGTH, PREFILTER_LENGTH_FACTOR * needle.len(), ); // For haystacks with length==min, we still want to avoid the prefilter, // so add 1. min + 1 } #[cfg(all(test, feature = "std", not(miri)))] pub(crate) mod tests { use std::convert::{TryFrom, TryInto}; use super::*; use crate::memmem::{ prefilter::PrefilterFnTy, rabinkarp, rarebytes::RareNeedleBytes, }; // Below is a small jig that generates prefilter tests. The main purpose // of this jig is to generate tests of varying needle/haystack lengths // in order to try and exercise all code paths in our prefilters. And in // particular, this is especially important for vectorized prefilters where // certain code paths might only be exercised at certain lengths. /// A test that represents the input and expected output to a prefilter /// function. The test should be able to run with any prefilter function /// and get the expected output. pub(crate) struct PrefilterTest { // These fields represent the inputs and expected output of a forwards // prefilter function. pub(crate) ninfo: NeedleInfo, pub(crate) haystack: Vec, pub(crate) needle: Vec, pub(crate) output: Option, } impl PrefilterTest { /// Run all generated forward prefilter tests on the given prefn. /// /// # Safety /// /// Callers must ensure that the given prefilter function pointer is /// safe to call for all inputs in the current environment. pub(crate) unsafe fn run_all_tests(prefn: PrefilterFnTy) { PrefilterTest::run_all_tests_filter(prefn, |_| true) } /// Run all generated forward prefilter tests that pass the given /// predicate on the given prefn. /// /// # Safety /// /// Callers must ensure that the given prefilter function pointer is /// safe to call for all inputs in the current environment. pub(crate) unsafe fn run_all_tests_filter( prefn: PrefilterFnTy, mut predicate: impl FnMut(&PrefilterTest) -> bool, ) { for seed in PREFILTER_TEST_SEEDS { for test in seed.generate() { if predicate(&test) { test.run(prefn); } } } } /// Create a new prefilter test from a seed and some chose offsets to /// rare bytes in the seed's needle. /// /// If a valid test could not be constructed, then None is returned. /// (Currently, we take the approach of massaging tests to be valid /// instead of rejecting them outright.) fn new( seed: &PrefilterTestSeed, rare1i: usize, rare2i: usize, haystack_len: usize, needle_len: usize, output: Option, ) -> Option { let mut rare1i: u8 = rare1i.try_into().unwrap(); let mut rare2i: u8 = rare2i.try_into().unwrap(); // The '#' byte is never used in a haystack (unless we're expecting // a match), while the '@' byte is never used in a needle. let mut haystack = vec![b'@'; haystack_len]; let mut needle = vec![b'#'; needle_len]; needle[0] = seed.first; needle[rare1i as usize] = seed.rare1; needle[rare2i as usize] = seed.rare2; // If we're expecting a match, then make sure the needle occurs // in the haystack at the expected position. if let Some(i) = output { haystack[i..i + needle.len()].copy_from_slice(&needle); } // If the operations above lead to rare offsets pointing to the // non-first occurrence of a byte, then adjust it. This might lead // to redundant tests, but it's simpler than trying to change the // generation process I think. if let Some(i) = crate::memchr(seed.rare1, &needle) { rare1i = u8::try_from(i).unwrap(); } if let Some(i) = crate::memchr(seed.rare2, &needle) { rare2i = u8::try_from(i).unwrap(); } let ninfo = NeedleInfo { rarebytes: RareNeedleBytes::new(rare1i, rare2i), nhash: rabinkarp::NeedleHash::forward(&needle), }; Some(PrefilterTest { ninfo, haystack, needle, output }) } /// Run this specific test on the given prefilter function. If the /// outputs do no match, then this routine panics with a failure /// message. /// /// # Safety /// /// Callers must ensure that the given prefilter function pointer is /// safe to call for all inputs in the current environment. unsafe fn run(&self, prefn: PrefilterFnTy) { let mut prestate = PrefilterState::new(); assert_eq!( self.output, prefn( &mut prestate, &self.ninfo, &self.haystack, &self.needle ), "ninfo: {:?}, haystack(len={}): {:?}, needle(len={}): {:?}", self.ninfo, self.haystack.len(), std::str::from_utf8(&self.haystack).unwrap(), self.needle.len(), std::str::from_utf8(&self.needle).unwrap(), ); } } /// A set of prefilter test seeds. Each seed serves as the base for the /// generation of many other tests. In essence, the seed captures the /// "rare" and first bytes among our needle. The tests generated from each /// seed essentially vary the length of the needle and haystack, while /// using the rare/first byte configuration from the seed. /// /// The purpose of this is to test many different needle/haystack lengths. /// In particular, some of the vector optimizations might only have bugs /// in haystacks of a certain size. const PREFILTER_TEST_SEEDS: &[PrefilterTestSeed] = &[ PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'z' }, PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'z' }, PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'x' }, PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'x' }, PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'y' }, ]; /// Data that describes a single prefilter test seed. struct PrefilterTestSeed { first: u8, rare1: u8, rare2: u8, } impl PrefilterTestSeed { /// Generate a series of prefilter tests from this seed. fn generate(&self) -> Vec { let mut tests = vec![]; let mut push = |test: Option| { if let Some(test) = test { tests.push(test); } }; let len_start = 2; // The loop below generates *a lot* of tests. The number of tests // was chosen somewhat empirically to be "bearable" when running // the test suite. for needle_len in len_start..=40 { let rare_start = len_start - 1; for rare1i in rare_start..needle_len { for rare2i in rare1i..needle_len { for haystack_len in needle_len..=66 { push(PrefilterTest::new( self, rare1i, rare2i, haystack_len, needle_len, None, )); // Test all possible match scenarios for this // needle and haystack. for output in 0..=(haystack_len - needle_len) { push(PrefilterTest::new( self, rare1i, rare2i, haystack_len, needle_len, Some(output), )); } } } } } tests } } } memchr-2.4.1/src/memmem/prefilter/x86/avx.rs000064400000000000000000000022230072674642500167630ustar 00000000000000use core::arch::x86_64::__m256i; use crate::memmem::{ prefilter::{PrefilterFnTy, PrefilterState}, NeedleInfo, }; // Check that the functions below satisfy the Prefilter function type. const _: PrefilterFnTy = find; /// An AVX2 accelerated candidate finder for single-substring search. /// /// # Safety /// /// Callers must ensure that the avx2 CPU feature is enabled in the current /// environment. #[target_feature(enable = "avx2")] pub(crate) unsafe fn find( prestate: &mut PrefilterState, ninfo: &NeedleInfo, haystack: &[u8], needle: &[u8], ) -> Option { super::super::genericsimd::find::<__m256i>( prestate, ninfo, haystack, needle, super::sse::find, ) } #[cfg(test)] mod tests { #[test] #[cfg(not(miri))] fn prefilter_permutations() { use crate::memmem::prefilter::tests::PrefilterTest; if !is_x86_feature_detected!("avx2") { return; } // SAFETY: The safety of super::find only requires that the current // CPU support AVX2, which we checked above. unsafe { PrefilterTest::run_all_tests(super::find) }; } } memchr-2.4.1/src/memmem/prefilter/x86/mod.rs000064400000000000000000000002400072674642500167410ustar 00000000000000// We only use AVX when we can detect at runtime whether it's available, which // requires std. #[cfg(feature = "std")] pub(crate) mod avx; pub(crate) mod sse; memchr-2.4.1/src/memmem/prefilter/x86/sse.rs000064400000000000000000000032630072674642500167640ustar 00000000000000use core::arch::x86_64::__m128i; use crate::memmem::{ prefilter::{PrefilterFnTy, PrefilterState}, NeedleInfo, }; // Check that the functions below satisfy the Prefilter function type. const _: PrefilterFnTy = find; /// An SSE2 accelerated candidate finder for single-substring search. /// /// # Safety /// /// Callers must ensure that the sse2 CPU feature is enabled in the current /// environment. This feature should be enabled in all x86_64 targets. #[target_feature(enable = "sse2")] pub(crate) unsafe fn find( prestate: &mut PrefilterState, ninfo: &NeedleInfo, haystack: &[u8], needle: &[u8], ) -> Option { // If the haystack is too small for SSE2, then just run memchr on the // rarest byte and be done with it. (It is likely that this code path is // rarely exercised, since a higher level routine will probably dispatch to // Rabin-Karp for such a small haystack.) fn simple_memchr_fallback( _prestate: &mut PrefilterState, ninfo: &NeedleInfo, haystack: &[u8], needle: &[u8], ) -> Option { let (rare, _) = ninfo.rarebytes.as_rare_ordered_usize(); crate::memchr(needle[rare], haystack).map(|i| i.saturating_sub(rare)) } super::super::genericsimd::find::<__m128i>( prestate, ninfo, haystack, needle, simple_memchr_fallback, ) } #[cfg(all(test, feature = "std"))] mod tests { #[test] #[cfg(not(miri))] fn prefilter_permutations() { use crate::memmem::prefilter::tests::PrefilterTest; // SAFETY: super::find is safe to call for all inputs on x86. unsafe { PrefilterTest::run_all_tests(super::find) }; } } memchr-2.4.1/src/memmem/rabinkarp.rs000064400000000000000000000201730072674642500155210ustar 00000000000000/* This module implements the classical Rabin-Karp substring search algorithm, with no extra frills. While its use would seem to break our time complexity guarantee of O(m+n) (RK's time complexity is O(mn)), we are careful to only ever use RK on a constant subset of haystacks. The main point here is that RK has good latency properties for small needles/haystacks. It's very quick to compute a needle hash and zip through the haystack when compared to initializing Two-Way, for example. And this is especially useful for cases where the haystack is just too short for vector instructions to do much good. The hashing function used here is the same one recommended by ESMAJ. Another choice instead of Rabin-Karp would be Shift-Or. But its latency isn't quite as good since its preprocessing time is a bit more expensive (both in practice and in theory). However, perhaps Shift-Or has a place somewhere else for short patterns. I think the main problem is that it requires space proportional to the alphabet and the needle. If we, for example, supported needles up to length 16, then the total table size would be len(alphabet)*size_of::()==512 bytes. Which isn't exactly small, and it's probably bad to put that on the stack. So ideally, we'd throw it on the heap, but we'd really like to write as much code without using alloc/std as possible. But maybe it's worth the special casing. It's a TODO to benchmark. Wikipedia has a decent explanation, if a bit heavy on the theory: https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm But ESMAJ provides something a bit more concrete: http://www-igm.univ-mlv.fr/~lecroq/string/node5.html Finally, aho-corasick uses Rabin-Karp for multiple pattern match in some cases: https://github.com/BurntSushi/aho-corasick/blob/3852632f10587db0ff72ef29e88d58bf305a0946/src/packed/rabinkarp.rs */ /// Whether RK is believed to be very fast for the given needle/haystack. pub(crate) fn is_fast(haystack: &[u8], _needle: &[u8]) -> bool { haystack.len() < 16 } /// Search for the first occurrence of needle in haystack using Rabin-Karp. pub(crate) fn find(haystack: &[u8], needle: &[u8]) -> Option { find_with(&NeedleHash::forward(needle), haystack, needle) } /// Search for the first occurrence of needle in haystack using Rabin-Karp with /// a pre-computed needle hash. pub(crate) fn find_with( nhash: &NeedleHash, mut haystack: &[u8], needle: &[u8], ) -> Option { if haystack.len() < needle.len() { return None; } let start = haystack.as_ptr() as usize; let mut hash = Hash::from_bytes_fwd(&haystack[..needle.len()]); // N.B. I've experimented with unrolling this loop, but couldn't realize // any obvious gains. loop { if nhash.eq(hash) && is_prefix(haystack, needle) { return Some(haystack.as_ptr() as usize - start); } if needle.len() >= haystack.len() { return None; } hash.roll(&nhash, haystack[0], haystack[needle.len()]); haystack = &haystack[1..]; } } /// Search for the last occurrence of needle in haystack using Rabin-Karp. pub(crate) fn rfind(haystack: &[u8], needle: &[u8]) -> Option { rfind_with(&NeedleHash::reverse(needle), haystack, needle) } /// Search for the last occurrence of needle in haystack using Rabin-Karp with /// a pre-computed needle hash. pub(crate) fn rfind_with( nhash: &NeedleHash, mut haystack: &[u8], needle: &[u8], ) -> Option { if haystack.len() < needle.len() { return None; } let mut hash = Hash::from_bytes_rev(&haystack[haystack.len() - needle.len()..]); loop { if nhash.eq(hash) && is_suffix(haystack, needle) { return Some(haystack.len() - needle.len()); } if needle.len() >= haystack.len() { return None; } hash.roll( &nhash, haystack[haystack.len() - 1], haystack[haystack.len() - needle.len() - 1], ); haystack = &haystack[..haystack.len() - 1]; } } /// A hash derived from a needle. #[derive(Clone, Copy, Debug, Default)] pub(crate) struct NeedleHash { /// The actual hash. hash: Hash, /// The factor needed to multiply a byte by in order to subtract it from /// the hash. It is defined to be 2^(n-1) (using wrapping exponentiation), /// where n is the length of the needle. This is how we "remove" a byte /// from the hash once the hash window rolls past it. hash_2pow: u32, } impl NeedleHash { /// Create a new Rabin-Karp hash for the given needle for use in forward /// searching. pub(crate) fn forward(needle: &[u8]) -> NeedleHash { let mut nh = NeedleHash { hash: Hash::new(), hash_2pow: 1 }; if needle.is_empty() { return nh; } nh.hash.add(needle[0]); for &b in needle.iter().skip(1) { nh.hash.add(b); nh.hash_2pow = nh.hash_2pow.wrapping_shl(1); } nh } /// Create a new Rabin-Karp hash for the given needle for use in reverse /// searching. pub(crate) fn reverse(needle: &[u8]) -> NeedleHash { let mut nh = NeedleHash { hash: Hash::new(), hash_2pow: 1 }; if needle.is_empty() { return nh; } nh.hash.add(needle[needle.len() - 1]); for &b in needle.iter().rev().skip(1) { nh.hash.add(b); nh.hash_2pow = nh.hash_2pow.wrapping_shl(1); } nh } /// Return true if the hashes are equivalent. fn eq(&self, hash: Hash) -> bool { self.hash == hash } } /// A Rabin-Karp hash. This might represent the hash of a needle, or the hash /// of a rolling window in the haystack. #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub(crate) struct Hash(u32); impl Hash { /// Create a new hash that represents the empty string. pub(crate) fn new() -> Hash { Hash(0) } /// Create a new hash from the bytes given for use in forward searches. pub(crate) fn from_bytes_fwd(bytes: &[u8]) -> Hash { let mut hash = Hash::new(); for &b in bytes { hash.add(b); } hash } /// Create a new hash from the bytes given for use in reverse searches. fn from_bytes_rev(bytes: &[u8]) -> Hash { let mut hash = Hash::new(); for &b in bytes.iter().rev() { hash.add(b); } hash } /// Add 'new' and remove 'old' from this hash. The given needle hash should /// correspond to the hash computed for the needle being searched for. /// /// This is meant to be used when the rolling window of the haystack is /// advanced. fn roll(&mut self, nhash: &NeedleHash, old: u8, new: u8) { self.del(nhash, old); self.add(new); } /// Add a byte to this hash. fn add(&mut self, byte: u8) { self.0 = self.0.wrapping_shl(1).wrapping_add(byte as u32); } /// Remove a byte from this hash. The given needle hash should correspond /// to the hash computed for the needle being searched for. fn del(&mut self, nhash: &NeedleHash, byte: u8) { let factor = nhash.hash_2pow; self.0 = self.0.wrapping_sub((byte as u32).wrapping_mul(factor)); } } /// Returns true if the given needle is a prefix of the given haystack. /// /// We forcefully don't inline the is_prefix call and hint at the compiler that /// it is unlikely to be called. This causes the inner rabinkarp loop above /// to be a bit tighter and leads to some performance improvement. See the /// memmem/krate/prebuilt/sliceslice-words/words benchmark. #[cold] #[inline(never)] fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { crate::memmem::util::is_prefix(haystack, needle) } /// Returns true if the given needle is a suffix of the given haystack. /// /// See is_prefix for why this is forcefully not inlined. #[cold] #[inline(never)] fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { crate::memmem::util::is_suffix(haystack, needle) } #[cfg(test)] mod simpletests { define_memmem_simple_tests!(super::find, super::rfind); } #[cfg(all(test, feature = "std", not(miri)))] mod proptests { define_memmem_quickcheck_tests!(super::find, super::rfind); } memchr-2.4.1/src/memmem/rarebytes.rs000064400000000000000000000141000072674642500155410ustar 00000000000000/// A heuristic frequency based detection of rare bytes for substring search. /// /// This detector attempts to pick out two bytes in a needle that are predicted /// to occur least frequently. The purpose is to use these bytes to implement /// fast candidate search using vectorized code. /// /// A set of offsets is only computed for needles of length 2 or greater. /// Smaller needles should be special cased by the substring search algorithm /// in use. (e.g., Use memchr for single byte needles.) /// /// Note that we use `u8` to represent the offsets of the rare bytes in a /// needle to reduce space usage. This means that rare byte occurring after the /// first 255 bytes in a needle will never be used. #[derive(Clone, Copy, Debug, Default)] pub(crate) struct RareNeedleBytes { /// The leftmost offset of the rarest byte in the needle, according to /// pre-computed frequency analysis. The "leftmost offset" means that /// rare1i <= i for all i where needle[i] == needle[rare1i]. rare1i: u8, /// The leftmost offset of the second rarest byte in the needle, according /// to pre-computed frequency analysis. The "leftmost offset" means that /// rare2i <= i for all i where needle[i] == needle[rare2i]. /// /// The second rarest byte is used as a type of guard for quickly detecting /// a mismatch if the first byte matches. This is a hedge against /// pathological cases where the pre-computed frequency analysis may be /// off. (But of course, does not prevent *all* pathological cases.) /// /// In general, rare1i != rare2i by construction, although there is no hard /// requirement that they be different. However, since the case of a single /// byte needle is handled specially by memchr itself, rare2i generally /// always should be different from rare1i since it would otherwise be /// ineffective as a guard. rare2i: u8, } impl RareNeedleBytes { /// Create a new pair of rare needle bytes with the given offsets. This is /// only used in tests for generating input data. #[cfg(all(test, feature = "std"))] pub(crate) fn new(rare1i: u8, rare2i: u8) -> RareNeedleBytes { RareNeedleBytes { rare1i, rare2i } } /// Detect the leftmost offsets of the two rarest bytes in the given /// needle. pub(crate) fn forward(needle: &[u8]) -> RareNeedleBytes { if needle.len() <= 1 || needle.len() > core::u8::MAX as usize { // For needles bigger than u8::MAX, our offsets aren't big enough. // (We make our offsets small to reduce stack copying.) // If you have a use case for it, please file an issue. In that // case, we should probably just adjust the routine below to pick // some rare bytes from the first 255 bytes of the needle. // // Also note that for needles of size 0 or 1, they are special // cased in Two-Way. // // TODO: Benchmar this. return RareNeedleBytes { rare1i: 0, rare2i: 0 }; } // Find the rarest two bytes. We make them distinct by construction. let (mut rare1, mut rare1i) = (needle[0], 0); let (mut rare2, mut rare2i) = (needle[1], 1); if rank(rare2) < rank(rare1) { core::mem::swap(&mut rare1, &mut rare2); core::mem::swap(&mut rare1i, &mut rare2i); } for (i, &b) in needle.iter().enumerate().skip(2) { if rank(b) < rank(rare1) { rare2 = rare1; rare2i = rare1i; rare1 = b; rare1i = i as u8; } else if b != rare1 && rank(b) < rank(rare2) { rare2 = b; rare2i = i as u8; } } // While not strictly required, we really don't want these to be // equivalent. If they were, it would reduce the effectiveness of // candidate searching using these rare bytes by increasing the rate of // false positives. assert_ne!(rare1i, rare2i); RareNeedleBytes { rare1i, rare2i } } /// Return the rare bytes in the given needle in the forward direction. /// The needle given must be the same one given to the RareNeedleBytes /// constructor. pub(crate) fn as_rare_bytes(&self, needle: &[u8]) -> (u8, u8) { (needle[self.rare1i as usize], needle[self.rare2i as usize]) } /// Return the rare offsets such that the first offset is always <= to the /// second offset. This is useful when the caller doesn't care whether /// rare1 is rarer than rare2, but just wants to ensure that they are /// ordered with respect to one another. #[cfg(memchr_runtime_simd)] pub(crate) fn as_rare_ordered_usize(&self) -> (usize, usize) { let (rare1i, rare2i) = self.as_rare_ordered_u8(); (rare1i as usize, rare2i as usize) } /// Like as_rare_ordered_usize, but returns the offsets as their native /// u8 values. #[cfg(memchr_runtime_simd)] pub(crate) fn as_rare_ordered_u8(&self) -> (u8, u8) { if self.rare1i <= self.rare2i { (self.rare1i, self.rare2i) } else { (self.rare2i, self.rare1i) } } /// Return the rare offsets as usize values in the order in which they were /// constructed. rare1, for example, is constructed as the "rarer" byte, /// and thus, callers may want to treat it differently from rare2. pub(crate) fn as_rare_usize(&self) -> (usize, usize) { (self.rare1i as usize, self.rare2i as usize) } /// Return the byte frequency rank of each byte. The higher the rank, the /// more frequency the byte is predicted to be. The needle given must be /// the same one given to the RareNeedleBytes constructor. pub(crate) fn as_ranks(&self, needle: &[u8]) -> (usize, usize) { let (b1, b2) = self.as_rare_bytes(needle); (rank(b1), rank(b2)) } } /// Return the heuristical frequency rank of the given byte. A lower rank /// means the byte is believed to occur less frequently. fn rank(b: u8) -> usize { crate::memmem::byte_frequencies::BYTE_FREQUENCIES[b as usize] as usize } memchr-2.4.1/src/memmem/twoway.rs000064400000000000000000000774100072674642500151100ustar 00000000000000use core::cmp; use crate::memmem::{prefilter::Pre, util}; /// Two-Way search in the forward direction. #[derive(Clone, Copy, Debug)] pub(crate) struct Forward(TwoWay); /// Two-Way search in the reverse direction. #[derive(Clone, Copy, Debug)] pub(crate) struct Reverse(TwoWay); /// An implementation of the TwoWay substring search algorithm, with heuristics /// for accelerating search based on frequency analysis. /// /// This searcher supports forward and reverse search, although not /// simultaneously. It runs in O(n + m) time and O(1) space, where /// `n ~ len(needle)` and `m ~ len(haystack)`. /// /// The implementation here roughly matches that which was developed by /// Crochemore and Perrin in their 1991 paper "Two-way string-matching." The /// changes in this implementation are 1) the use of zero-based indices, 2) a /// heuristic skip table based on the last byte (borrowed from Rust's standard /// library) and 3) the addition of heuristics for a fast skip loop. That is, /// (3) this will detect bytes that are believed to be rare in the needle and /// use fast vectorized instructions to find their occurrences quickly. The /// Two-Way algorithm is then used to confirm whether a match at that location /// occurred. /// /// The heuristic for fast skipping is automatically shut off if it's /// detected to be ineffective at search time. Generally, this only occurs in /// pathological cases. But this is generally necessary in order to preserve /// a `O(n + m)` time bound. /// /// The code below is fairly complex and not obviously correct at all. It's /// likely necessary to read the Two-Way paper cited above in order to fully /// grok this code. The essence of it is: /// /// 1) Do something to detect a "critical" position in the needle. /// 2) For the current position in the haystack, look if needle[critical..] /// matches at that position. /// 3) If so, look if needle[..critical] matches. /// 4) If a mismatch occurs, shift the search by some amount based on the /// critical position and a pre-computed shift. /// /// This type is wrapped in Forward and Reverse types that expose consistent /// forward or reverse APIs. #[derive(Clone, Copy, Debug)] struct TwoWay { /// A small bitset used as a quick prefilter (in addition to the faster /// SIMD based prefilter). Namely, a bit 'i' is set if and only if b%64==i /// for any b in the needle. /// /// When used as a prefilter, if the last byte at the current candidate /// position is NOT in this set, then we can skip that entire candidate /// position (the length of the needle). This is essentially the shift /// trick found in Boyer-Moore, but only applied to bytes that don't appear /// in the needle. /// /// N.B. This trick was inspired by something similar in std's /// implementation of Two-Way. byteset: ApproximateByteSet, /// A critical position in needle. Specifically, this position corresponds /// to beginning of either the minimal or maximal suffix in needle. (N.B. /// See SuffixType below for why "minimal" isn't quite the correct word /// here.) /// /// This is the position at which every search begins. Namely, search /// starts by scanning text to the right of this position, and only if /// there's a match does the text to the left of this position get scanned. critical_pos: usize, /// The amount we shift by in the Two-Way search algorithm. This /// corresponds to the "small period" and "large period" cases. shift: Shift, } impl Forward { /// Create a searcher that uses the Two-Way algorithm by searching forwards /// through any haystack. pub(crate) fn new(needle: &[u8]) -> Forward { if needle.is_empty() { return Forward(TwoWay::empty()); } let byteset = ApproximateByteSet::new(needle); let min_suffix = Suffix::forward(needle, SuffixKind::Minimal); let max_suffix = Suffix::forward(needle, SuffixKind::Maximal); let (period_lower_bound, critical_pos) = if min_suffix.pos > max_suffix.pos { (min_suffix.period, min_suffix.pos) } else { (max_suffix.period, max_suffix.pos) }; let shift = Shift::forward(needle, period_lower_bound, critical_pos); Forward(TwoWay { byteset, critical_pos, shift }) } /// Find the position of the first occurrence of this searcher's needle in /// the given haystack. If one does not exist, then return None. /// /// This accepts prefilter state that is useful when using the same /// searcher multiple times, such as in an iterator. /// /// Callers must guarantee that the needle is non-empty and its length is /// <= the haystack's length. #[inline(always)] pub(crate) fn find( &self, pre: Option<&mut Pre<'_>>, haystack: &[u8], needle: &[u8], ) -> Option { debug_assert!(!needle.is_empty(), "needle should not be empty"); debug_assert!(needle.len() <= haystack.len(), "haystack too short"); match self.0.shift { Shift::Small { period } => { self.find_small_imp(pre, haystack, needle, period) } Shift::Large { shift } => { self.find_large_imp(pre, haystack, needle, shift) } } } /// Like find, but handles the degenerate substring test cases. This is /// only useful for conveniently testing this substring implementation in /// isolation. #[cfg(test)] fn find_general( &self, pre: Option<&mut Pre<'_>>, haystack: &[u8], needle: &[u8], ) -> Option { if needle.is_empty() { Some(0) } else if haystack.len() < needle.len() { None } else { self.find(pre, haystack, needle) } } // Each of the two search implementations below can be accelerated by a // prefilter, but it is not always enabled. To avoid its overhead when // its disabled, we explicitly inline each search implementation based on // whether a prefilter will be used or not. The decision on which to use // is made in the parent meta searcher. #[inline(always)] fn find_small_imp( &self, mut pre: Option<&mut Pre<'_>>, haystack: &[u8], needle: &[u8], period: usize, ) -> Option { let last_byte = needle.len() - 1; let mut pos = 0; let mut shift = 0; while pos + needle.len() <= haystack.len() { let mut i = cmp::max(self.0.critical_pos, shift); if let Some(pre) = pre.as_mut() { if pre.should_call() { pos += pre.call(&haystack[pos..], needle)?; shift = 0; i = self.0.critical_pos; if pos + needle.len() > haystack.len() { return None; } } } if !self.0.byteset.contains(haystack[pos + last_byte]) { pos += needle.len(); shift = 0; continue; } while i < needle.len() && needle[i] == haystack[pos + i] { i += 1; } if i < needle.len() { pos += i - self.0.critical_pos + 1; shift = 0; } else { let mut j = self.0.critical_pos; while j > shift && needle[j] == haystack[pos + j] { j -= 1; } if j <= shift && needle[shift] == haystack[pos + shift] { return Some(pos); } pos += period; shift = needle.len() - period; } } None } #[inline(always)] fn find_large_imp( &self, mut pre: Option<&mut Pre<'_>>, haystack: &[u8], needle: &[u8], shift: usize, ) -> Option { let last_byte = needle.len() - 1; let mut pos = 0; 'outer: while pos + needle.len() <= haystack.len() { if let Some(pre) = pre.as_mut() { if pre.should_call() { pos += pre.call(&haystack[pos..], needle)?; if pos + needle.len() > haystack.len() { return None; } } } if !self.0.byteset.contains(haystack[pos + last_byte]) { pos += needle.len(); continue; } let mut i = self.0.critical_pos; while i < needle.len() && needle[i] == haystack[pos + i] { i += 1; } if i < needle.len() { pos += i - self.0.critical_pos + 1; } else { for j in (0..self.0.critical_pos).rev() { if needle[j] != haystack[pos + j] { pos += shift; continue 'outer; } } return Some(pos); } } None } } impl Reverse { /// Create a searcher that uses the Two-Way algorithm by searching in /// reverse through any haystack. pub(crate) fn new(needle: &[u8]) -> Reverse { if needle.is_empty() { return Reverse(TwoWay::empty()); } let byteset = ApproximateByteSet::new(needle); let min_suffix = Suffix::reverse(needle, SuffixKind::Minimal); let max_suffix = Suffix::reverse(needle, SuffixKind::Maximal); let (period_lower_bound, critical_pos) = if min_suffix.pos < max_suffix.pos { (min_suffix.period, min_suffix.pos) } else { (max_suffix.period, max_suffix.pos) }; // let critical_pos = needle.len() - critical_pos; let shift = Shift::reverse(needle, period_lower_bound, critical_pos); Reverse(TwoWay { byteset, critical_pos, shift }) } /// Find the position of the last occurrence of this searcher's needle /// in the given haystack. If one does not exist, then return None. /// /// This will automatically initialize prefilter state. This should only /// be used for one-off searches. /// /// Callers must guarantee that the needle is non-empty and its length is /// <= the haystack's length. #[inline(always)] pub(crate) fn rfind( &self, haystack: &[u8], needle: &[u8], ) -> Option { debug_assert!(!needle.is_empty(), "needle should not be empty"); debug_assert!(needle.len() <= haystack.len(), "haystack too short"); // For the reverse case, we don't use a prefilter. It's plausible that // perhaps we should, but it's a lot of additional code to do it, and // it's not clear that it's actually worth it. If you have a really // compelling use case for this, please file an issue. match self.0.shift { Shift::Small { period } => { self.rfind_small_imp(haystack, needle, period) } Shift::Large { shift } => { self.rfind_large_imp(haystack, needle, shift) } } } /// Like rfind, but handles the degenerate substring test cases. This is /// only useful for conveniently testing this substring implementation in /// isolation. #[cfg(test)] fn rfind_general(&self, haystack: &[u8], needle: &[u8]) -> Option { if needle.is_empty() { Some(haystack.len()) } else if haystack.len() < needle.len() { None } else { self.rfind(haystack, needle) } } #[inline(always)] fn rfind_small_imp( &self, haystack: &[u8], needle: &[u8], period: usize, ) -> Option { let nlen = needle.len(); let mut pos = haystack.len(); let mut shift = nlen; while pos >= nlen { if !self.0.byteset.contains(haystack[pos - nlen]) { pos -= nlen; shift = nlen; continue; } let mut i = cmp::min(self.0.critical_pos, shift); while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { i -= 1; } if i > 0 || needle[0] != haystack[pos - nlen] { pos -= self.0.critical_pos - i + 1; shift = nlen; } else { let mut j = self.0.critical_pos; while j < shift && needle[j] == haystack[pos - nlen + j] { j += 1; } if j >= shift { return Some(pos - nlen); } pos -= period; shift = period; } } None } #[inline(always)] fn rfind_large_imp( &self, haystack: &[u8], needle: &[u8], shift: usize, ) -> Option { let nlen = needle.len(); let mut pos = haystack.len(); while pos >= nlen { if !self.0.byteset.contains(haystack[pos - nlen]) { pos -= nlen; continue; } let mut i = self.0.critical_pos; while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { i -= 1; } if i > 0 || needle[0] != haystack[pos - nlen] { pos -= self.0.critical_pos - i + 1; } else { let mut j = self.0.critical_pos; while j < nlen && needle[j] == haystack[pos - nlen + j] { j += 1; } if j == nlen { return Some(pos - nlen); } pos -= shift; } } None } } impl TwoWay { fn empty() -> TwoWay { TwoWay { byteset: ApproximateByteSet::new(b""), critical_pos: 0, shift: Shift::Large { shift: 0 }, } } } /// A representation of the amount we're allowed to shift by during Two-Way /// search. /// /// When computing a critical factorization of the needle, we find the position /// of the critical factorization by finding the needle's maximal (or minimal) /// suffix, along with the period of that suffix. It turns out that the period /// of that suffix is a lower bound on the period of the needle itself. /// /// This lower bound is equivalent to the actual period of the needle in /// some cases. To describe that case, we denote the needle as `x` where /// `x = uv` and `v` is the lexicographic maximal suffix of `v`. The lower /// bound given here is always the period of `v`, which is `<= period(x)`. The /// case where `period(v) == period(x)` occurs when `len(u) < (len(x) / 2)` and /// where `u` is a suffix of `v[0..period(v)]`. /// /// This case is important because the search algorithm for when the /// periods are equivalent is slightly different than the search algorithm /// for when the periods are not equivalent. In particular, when they aren't /// equivalent, we know that the period of the needle is no less than half its /// length. In this case, we shift by an amount less than or equal to the /// period of the needle (determined by the maximum length of the components /// of the critical factorization of `x`, i.e., `max(len(u), len(v))`).. /// /// The above two cases are represented by the variants below. Each entails /// a different instantiation of the Two-Way search algorithm. /// /// N.B. If we could find a way to compute the exact period in all cases, /// then we could collapse this case analysis and simplify the algorithm. The /// Two-Way paper suggests this is possible, but more reading is required to /// grok why the authors didn't pursue that path. #[derive(Clone, Copy, Debug)] enum Shift { Small { period: usize }, Large { shift: usize }, } impl Shift { /// Compute the shift for a given needle in the forward direction. /// /// This requires a lower bound on the period and a critical position. /// These can be computed by extracting both the minimal and maximal /// lexicographic suffixes, and choosing the right-most starting position. /// The lower bound on the period is then the period of the chosen suffix. fn forward( needle: &[u8], period_lower_bound: usize, critical_pos: usize, ) -> Shift { let large = cmp::max(critical_pos, needle.len() - critical_pos); if critical_pos * 2 >= needle.len() { return Shift::Large { shift: large }; } let (u, v) = needle.split_at(critical_pos); if !util::is_suffix(&v[..period_lower_bound], u) { return Shift::Large { shift: large }; } Shift::Small { period: period_lower_bound } } /// Compute the shift for a given needle in the reverse direction. /// /// This requires a lower bound on the period and a critical position. /// These can be computed by extracting both the minimal and maximal /// lexicographic suffixes, and choosing the left-most starting position. /// The lower bound on the period is then the period of the chosen suffix. fn reverse( needle: &[u8], period_lower_bound: usize, critical_pos: usize, ) -> Shift { let large = cmp::max(critical_pos, needle.len() - critical_pos); if (needle.len() - critical_pos) * 2 >= needle.len() { return Shift::Large { shift: large }; } let (v, u) = needle.split_at(critical_pos); if !util::is_prefix(&v[v.len() - period_lower_bound..], u) { return Shift::Large { shift: large }; } Shift::Small { period: period_lower_bound } } } /// A suffix extracted from a needle along with its period. #[derive(Debug)] struct Suffix { /// The starting position of this suffix. /// /// If this is a forward suffix, then `&bytes[pos..]` can be used. If this /// is a reverse suffix, then `&bytes[..pos]` can be used. That is, for /// forward suffixes, this is an inclusive starting position, where as for /// reverse suffixes, this is an exclusive ending position. pos: usize, /// The period of this suffix. /// /// Note that this is NOT necessarily the period of the string from which /// this suffix comes from. (It is always less than or equal to the period /// of the original string.) period: usize, } impl Suffix { fn forward(needle: &[u8], kind: SuffixKind) -> Suffix { debug_assert!(!needle.is_empty()); // suffix represents our maximal (or minimal) suffix, along with // its period. let mut suffix = Suffix { pos: 0, period: 1 }; // The start of a suffix in `needle` that we are considering as a // more maximal (or minimal) suffix than what's in `suffix`. let mut candidate_start = 1; // The current offset of our suffixes that we're comparing. // // When the characters at this offset are the same, then we mush on // to the next position since no decision is possible. When the // candidate's character is greater (or lesser) than the corresponding // character than our current maximal (or minimal) suffix, then the // current suffix is changed over to the candidate and we restart our // search. Otherwise, the candidate suffix is no good and we restart // our search on the next candidate. // // The three cases above correspond to the three cases in the loop // below. let mut offset = 0; while candidate_start + offset < needle.len() { let current = needle[suffix.pos + offset]; let candidate = needle[candidate_start + offset]; match kind.cmp(current, candidate) { SuffixOrdering::Accept => { suffix = Suffix { pos: candidate_start, period: 1 }; candidate_start += 1; offset = 0; } SuffixOrdering::Skip => { candidate_start += offset + 1; offset = 0; suffix.period = candidate_start - suffix.pos; } SuffixOrdering::Push => { if offset + 1 == suffix.period { candidate_start += suffix.period; offset = 0; } else { offset += 1; } } } } suffix } fn reverse(needle: &[u8], kind: SuffixKind) -> Suffix { debug_assert!(!needle.is_empty()); // See the comments in `forward` for how this works. let mut suffix = Suffix { pos: needle.len(), period: 1 }; if needle.len() == 1 { return suffix; } let mut candidate_start = needle.len() - 1; let mut offset = 0; while offset < candidate_start { let current = needle[suffix.pos - offset - 1]; let candidate = needle[candidate_start - offset - 1]; match kind.cmp(current, candidate) { SuffixOrdering::Accept => { suffix = Suffix { pos: candidate_start, period: 1 }; candidate_start -= 1; offset = 0; } SuffixOrdering::Skip => { candidate_start -= offset + 1; offset = 0; suffix.period = suffix.pos - candidate_start; } SuffixOrdering::Push => { if offset + 1 == suffix.period { candidate_start -= suffix.period; offset = 0; } else { offset += 1; } } } } suffix } } /// The kind of suffix to extract. #[derive(Clone, Copy, Debug)] enum SuffixKind { /// Extract the smallest lexicographic suffix from a string. /// /// Technically, this doesn't actually pick the smallest lexicographic /// suffix. e.g., Given the choice between `a` and `aa`, this will choose /// the latter over the former, even though `a < aa`. The reasoning for /// this isn't clear from the paper, but it still smells like a minimal /// suffix. Minimal, /// Extract the largest lexicographic suffix from a string. /// /// Unlike `Minimal`, this really does pick the maximum suffix. e.g., Given /// the choice between `z` and `zz`, this will choose the latter over the /// former. Maximal, } /// The result of comparing corresponding bytes between two suffixes. #[derive(Clone, Copy, Debug)] enum SuffixOrdering { /// This occurs when the given candidate byte indicates that the candidate /// suffix is better than the current maximal (or minimal) suffix. That is, /// the current candidate suffix should supplant the current maximal (or /// minimal) suffix. Accept, /// This occurs when the given candidate byte excludes the candidate suffix /// from being better than the current maximal (or minimal) suffix. That /// is, the current candidate suffix should be dropped and the next one /// should be considered. Skip, /// This occurs when no decision to accept or skip the candidate suffix /// can be made, e.g., when corresponding bytes are equivalent. In this /// case, the next corresponding bytes should be compared. Push, } impl SuffixKind { /// Returns true if and only if the given candidate byte indicates that /// it should replace the current suffix as the maximal (or minimal) /// suffix. fn cmp(self, current: u8, candidate: u8) -> SuffixOrdering { use self::SuffixOrdering::*; match self { SuffixKind::Minimal if candidate < current => Accept, SuffixKind::Minimal if candidate > current => Skip, SuffixKind::Minimal => Push, SuffixKind::Maximal if candidate > current => Accept, SuffixKind::Maximal if candidate < current => Skip, SuffixKind::Maximal => Push, } } } /// A bitset used to track whether a particular byte exists in a needle or not. /// /// Namely, bit 'i' is set if and only if byte%64==i for any byte in the /// needle. If a particular byte in the haystack is NOT in this set, then one /// can conclude that it is also not in the needle, and thus, one can advance /// in the haystack by needle.len() bytes. #[derive(Clone, Copy, Debug)] struct ApproximateByteSet(u64); impl ApproximateByteSet { /// Create a new set from the given needle. fn new(needle: &[u8]) -> ApproximateByteSet { let mut bits = 0; for &b in needle { bits |= 1 << (b % 64); } ApproximateByteSet(bits) } /// Return true if and only if the given byte might be in this set. This /// may return a false positive, but will never return a false negative. #[inline(always)] fn contains(&self, byte: u8) -> bool { self.0 & (1 << (byte % 64)) != 0 } } #[cfg(all(test, feature = "std", not(miri)))] mod tests { use quickcheck::quickcheck; use super::*; define_memmem_quickcheck_tests!( super::simpletests::twoway_find, super::simpletests::twoway_rfind ); /// Convenience wrapper for computing the suffix as a byte string. fn get_suffix_forward(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) { let s = Suffix::forward(needle, kind); (&needle[s.pos..], s.period) } /// Convenience wrapper for computing the reverse suffix as a byte string. fn get_suffix_reverse(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) { let s = Suffix::reverse(needle, kind); (&needle[..s.pos], s.period) } /// Return all of the non-empty suffixes in the given byte string. fn suffixes(bytes: &[u8]) -> Vec<&[u8]> { (0..bytes.len()).map(|i| &bytes[i..]).collect() } /// Return the lexicographically maximal suffix of the given byte string. fn naive_maximal_suffix_forward(needle: &[u8]) -> &[u8] { let mut sufs = suffixes(needle); sufs.sort(); sufs.pop().unwrap() } /// Return the lexicographically maximal suffix of the reverse of the given /// byte string. fn naive_maximal_suffix_reverse(needle: &[u8]) -> Vec { let mut reversed = needle.to_vec(); reversed.reverse(); let mut got = naive_maximal_suffix_forward(&reversed).to_vec(); got.reverse(); got } #[test] fn suffix_forward() { macro_rules! assert_suffix_min { ($given:expr, $expected:expr, $period:expr) => { let (got_suffix, got_period) = get_suffix_forward($given.as_bytes(), SuffixKind::Minimal); let got_suffix = std::str::from_utf8(got_suffix).unwrap(); assert_eq!(($expected, $period), (got_suffix, got_period)); }; } macro_rules! assert_suffix_max { ($given:expr, $expected:expr, $period:expr) => { let (got_suffix, got_period) = get_suffix_forward($given.as_bytes(), SuffixKind::Maximal); let got_suffix = std::str::from_utf8(got_suffix).unwrap(); assert_eq!(($expected, $period), (got_suffix, got_period)); }; } assert_suffix_min!("a", "a", 1); assert_suffix_max!("a", "a", 1); assert_suffix_min!("ab", "ab", 2); assert_suffix_max!("ab", "b", 1); assert_suffix_min!("ba", "a", 1); assert_suffix_max!("ba", "ba", 2); assert_suffix_min!("abc", "abc", 3); assert_suffix_max!("abc", "c", 1); assert_suffix_min!("acb", "acb", 3); assert_suffix_max!("acb", "cb", 2); assert_suffix_min!("cba", "a", 1); assert_suffix_max!("cba", "cba", 3); assert_suffix_min!("abcabc", "abcabc", 3); assert_suffix_max!("abcabc", "cabc", 3); assert_suffix_min!("abcabcabc", "abcabcabc", 3); assert_suffix_max!("abcabcabc", "cabcabc", 3); assert_suffix_min!("abczz", "abczz", 5); assert_suffix_max!("abczz", "zz", 1); assert_suffix_min!("zzabc", "abc", 3); assert_suffix_max!("zzabc", "zzabc", 5); assert_suffix_min!("aaa", "aaa", 1); assert_suffix_max!("aaa", "aaa", 1); assert_suffix_min!("foobar", "ar", 2); assert_suffix_max!("foobar", "r", 1); } #[test] fn suffix_reverse() { macro_rules! assert_suffix_min { ($given:expr, $expected:expr, $period:expr) => { let (got_suffix, got_period) = get_suffix_reverse($given.as_bytes(), SuffixKind::Minimal); let got_suffix = std::str::from_utf8(got_suffix).unwrap(); assert_eq!(($expected, $period), (got_suffix, got_period)); }; } macro_rules! assert_suffix_max { ($given:expr, $expected:expr, $period:expr) => { let (got_suffix, got_period) = get_suffix_reverse($given.as_bytes(), SuffixKind::Maximal); let got_suffix = std::str::from_utf8(got_suffix).unwrap(); assert_eq!(($expected, $period), (got_suffix, got_period)); }; } assert_suffix_min!("a", "a", 1); assert_suffix_max!("a", "a", 1); assert_suffix_min!("ab", "a", 1); assert_suffix_max!("ab", "ab", 2); assert_suffix_min!("ba", "ba", 2); assert_suffix_max!("ba", "b", 1); assert_suffix_min!("abc", "a", 1); assert_suffix_max!("abc", "abc", 3); assert_suffix_min!("acb", "a", 1); assert_suffix_max!("acb", "ac", 2); assert_suffix_min!("cba", "cba", 3); assert_suffix_max!("cba", "c", 1); assert_suffix_min!("abcabc", "abca", 3); assert_suffix_max!("abcabc", "abcabc", 3); assert_suffix_min!("abcabcabc", "abcabca", 3); assert_suffix_max!("abcabcabc", "abcabcabc", 3); assert_suffix_min!("abczz", "a", 1); assert_suffix_max!("abczz", "abczz", 5); assert_suffix_min!("zzabc", "zza", 3); assert_suffix_max!("zzabc", "zz", 1); assert_suffix_min!("aaa", "aaa", 1); assert_suffix_max!("aaa", "aaa", 1); } quickcheck! { fn qc_suffix_forward_maximal(bytes: Vec) -> bool { if bytes.is_empty() { return true; } let (got, _) = get_suffix_forward(&bytes, SuffixKind::Maximal); let expected = naive_maximal_suffix_forward(&bytes); got == expected } fn qc_suffix_reverse_maximal(bytes: Vec) -> bool { if bytes.is_empty() { return true; } let (got, _) = get_suffix_reverse(&bytes, SuffixKind::Maximal); let expected = naive_maximal_suffix_reverse(&bytes); expected == got } } } #[cfg(test)] mod simpletests { use super::*; pub(crate) fn twoway_find( haystack: &[u8], needle: &[u8], ) -> Option { Forward::new(needle).find_general(None, haystack, needle) } pub(crate) fn twoway_rfind( haystack: &[u8], needle: &[u8], ) -> Option { Reverse::new(needle).rfind_general(haystack, needle) } define_memmem_simple_tests!(twoway_find, twoway_rfind); // This is a regression test caught by quickcheck that exercised a bug in // the reverse small period handling. The bug was that we were using 'if j // == shift' to determine if a match occurred, but the correct guard is 'if // j >= shift', which matches the corresponding guard in the forward impl. #[test] fn regression_rev_small_period() { let rfind = super::simpletests::twoway_rfind; let haystack = "ababaz"; let needle = "abab"; assert_eq!(Some(0), rfind(haystack.as_bytes(), needle.as_bytes())); } } memchr-2.4.1/src/memmem/util.rs000064400000000000000000000073020072674642500145240ustar 00000000000000// These routines are meant to be optimized specifically for low latency as // compared to the equivalent routines offered by std. (Which may invoke the // dynamic linker and call out to libc, which introduces a bit more latency // than we'd like.) /// Returns true if and only if needle is a prefix of haystack. #[inline(always)] pub(crate) fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { needle.len() <= haystack.len() && memcmp(&haystack[..needle.len()], needle) } /// Returns true if and only if needle is a suffix of haystack. #[inline(always)] pub(crate) fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { needle.len() <= haystack.len() && memcmp(&haystack[haystack.len() - needle.len()..], needle) } /// Return true if and only if x.len() == y.len() && x[i] == y[i] for all /// 0 <= i < x.len(). /// /// Why not just use actual memcmp for this? Well, memcmp requires calling out /// to libc, and this routine is called in fairly hot code paths. Other than /// just calling out to libc, it also seems to result in worse codegen. By /// rolling our own memcmp in pure Rust, it seems to appear more friendly to /// the optimizer. /// /// We mark this as inline always, although, some callers may not want it /// inlined for better codegen (like Rabin-Karp). In that case, callers are /// advised to create a non-inlineable wrapper routine that calls memcmp. #[inline(always)] pub(crate) fn memcmp(x: &[u8], y: &[u8]) -> bool { if x.len() != y.len() { return false; } // If we don't have enough bytes to do 4-byte at a time loads, then // fall back to the naive slow version. // // TODO: We could do a copy_nonoverlapping combined with a mask instead // of a loop. Benchmark it. if x.len() < 4 { for (&b1, &b2) in x.iter().zip(y) { if b1 != b2 { return false; } } return true; } // When we have 4 or more bytes to compare, then proceed in chunks of 4 at // a time using unaligned loads. // // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is // that this particular version of memcmp is likely to be called with tiny // needles. That means that if we do 8 byte loads, then a higher proportion // of memcmp calls will use the slower variant above. With that said, this // is a hypothesis and is only loosely supported by benchmarks. There's // likely some improvement that could be made here. The main thing here // though is to optimize for latency, not throughput. // SAFETY: Via the conditional above, we know that both `px` and `py` // have the same length, so `px < pxend` implies that `py < pyend`. // Thus, derefencing both `px` and `py` in the loop below is safe. // // Moreover, we set `pxend` and `pyend` to be 4 bytes before the actual // end of of `px` and `py`. Thus, the final dereference outside of the // loop is guaranteed to be valid. (The final comparison will overlap with // the last comparison done in the loop for lengths that aren't multiples // of four.) // // Finally, we needn't worry about alignment here, since we do unaligned // loads. unsafe { let (mut px, mut py) = (x.as_ptr(), y.as_ptr()); let (pxend, pyend) = (px.add(x.len() - 4), py.add(y.len() - 4)); while px < pxend { let vx = (px as *const u32).read_unaligned(); let vy = (py as *const u32).read_unaligned(); if vx != vy { return false; } px = px.add(4); py = py.add(4); } let vx = (pxend as *const u32).read_unaligned(); let vy = (pyend as *const u32).read_unaligned(); vx == vy } } memchr-2.4.1/src/memmem/vector.rs000064400000000000000000000062530072674642500150550ustar 00000000000000/// A trait for describing vector operations used by vectorized searchers. /// /// The trait is highly constrained to low level vector operations needed. In /// general, it was invented mostly to be generic over x86's __m128i and /// __m256i types. It's likely that once std::simd becomes a thing, we can /// migrate to that since the operations required are quite simple. /// /// TODO: Consider moving this trait up a level and using it to implement /// memchr as well. The trait might need to grow one or two methods, but /// otherwise should be close to sufficient already. /// /// # Safety /// /// All methods are not safe since they are intended to be implemented using /// vendor intrinsics, which are also not safe. Callers must ensure that the /// appropriate target features are enabled in the calling function, and that /// the current CPU supports them. All implementations should avoid marking the /// routines with #[target_feature] and instead mark them as #[inline(always)] /// to ensure they get appropriately inlined. (inline(always) cannot be used /// with target_feature.) pub(crate) trait Vector: Copy + core::fmt::Debug { /// _mm_set1_epi8 or _mm256_set1_epi8 unsafe fn splat(byte: u8) -> Self; /// _mm_loadu_si128 or _mm256_loadu_si256 unsafe fn load_unaligned(data: *const u8) -> Self; /// _mm_movemask_epi8 or _mm256_movemask_epi8 unsafe fn movemask(self) -> u32; /// _mm_cmpeq_epi8 or _mm256_cmpeq_epi8 unsafe fn cmpeq(self, vector2: Self) -> Self; /// _mm_and_si128 or _mm256_and_si256 unsafe fn and(self, vector2: Self) -> Self; } #[cfg(target_arch = "x86_64")] mod x86sse { use super::Vector; use core::arch::x86_64::*; impl Vector for __m128i { #[inline(always)] unsafe fn splat(byte: u8) -> __m128i { _mm_set1_epi8(byte as i8) } #[inline(always)] unsafe fn load_unaligned(data: *const u8) -> __m128i { _mm_loadu_si128(data as *const __m128i) } #[inline(always)] unsafe fn movemask(self) -> u32 { _mm_movemask_epi8(self) as u32 } #[inline(always)] unsafe fn cmpeq(self, vector2: Self) -> __m128i { _mm_cmpeq_epi8(self, vector2) } #[inline(always)] unsafe fn and(self, vector2: Self) -> __m128i { _mm_and_si128(self, vector2) } } } #[cfg(all(feature = "std", target_arch = "x86_64"))] mod x86avx { use super::Vector; use core::arch::x86_64::*; impl Vector for __m256i { #[inline(always)] unsafe fn splat(byte: u8) -> __m256i { _mm256_set1_epi8(byte as i8) } #[inline(always)] unsafe fn load_unaligned(data: *const u8) -> __m256i { _mm256_loadu_si256(data as *const __m256i) } #[inline(always)] unsafe fn movemask(self) -> u32 { _mm256_movemask_epi8(self) as u32 } #[inline(always)] unsafe fn cmpeq(self, vector2: Self) -> __m256i { _mm256_cmpeq_epi8(self, vector2) } #[inline(always)] unsafe fn and(self, vector2: Self) -> __m256i { _mm256_and_si256(self, vector2) } } } memchr-2.4.1/src/memmem/x86/avx.rs000064400000000000000000000104600072674642500147710ustar 00000000000000#[cfg(not(feature = "std"))] pub(crate) use self::nostd::Forward; #[cfg(feature = "std")] pub(crate) use self::std::Forward; #[cfg(feature = "std")] mod std { use core::arch::x86_64::{__m128i, __m256i}; use crate::memmem::{genericsimd, NeedleInfo}; /// An AVX accelerated vectorized substring search routine that only works /// on small needles. #[derive(Clone, Copy, Debug)] pub(crate) struct Forward(genericsimd::Forward); impl Forward { /// Create a new "generic simd" forward searcher. If one could not be /// created from the given inputs, then None is returned. pub(crate) fn new( ninfo: &NeedleInfo, needle: &[u8], ) -> Option { if !cfg!(memchr_runtime_avx) || !is_x86_feature_detected!("avx2") { return None; } genericsimd::Forward::new(ninfo, needle).map(Forward) } /// Returns the minimum length of haystack that is needed for this /// searcher to work. Passing a haystack with a length smaller than /// this will cause `find` to panic. #[inline(always)] pub(crate) fn min_haystack_len(&self) -> usize { self.0.min_haystack_len::<__m128i>() } #[inline(always)] pub(crate) fn find( &self, haystack: &[u8], needle: &[u8], ) -> Option { // SAFETY: The only way a Forward value can exist is if the avx2 // target feature is enabled. This is the only safety requirement // for calling the genericsimd searcher. unsafe { self.find_impl(haystack, needle) } } /// The implementation of find marked with the appropriate target /// feature. /// /// # Safety /// /// Callers must ensure that the avx2 CPU feature is enabled in the /// current environment. #[target_feature(enable = "avx2")] unsafe fn find_impl( &self, haystack: &[u8], needle: &[u8], ) -> Option { if haystack.len() < self.0.min_haystack_len::<__m256i>() { genericsimd::fwd_find::<__m128i>(&self.0, haystack, needle) } else { genericsimd::fwd_find::<__m256i>(&self.0, haystack, needle) } } } } // We still define the avx "forward" type on nostd to make caller code a bit // simpler. This avoids needing a lot more conditional compilation. #[cfg(not(feature = "std"))] mod nostd { use crate::memmem::NeedleInfo; #[derive(Clone, Copy, Debug)] pub(crate) struct Forward(()); impl Forward { pub(crate) fn new( ninfo: &NeedleInfo, needle: &[u8], ) -> Option { None } pub(crate) fn min_haystack_len(&self) -> usize { unreachable!() } pub(crate) fn find( &self, haystack: &[u8], needle: &[u8], ) -> Option { unreachable!() } } } #[cfg(all(test, feature = "std", not(miri)))] mod tests { use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; fn find( _: &mut PrefilterState, ninfo: &NeedleInfo, haystack: &[u8], needle: &[u8], ) -> Option { super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) } #[test] fn prefilter_permutations() { use crate::memmem::prefilter::tests::PrefilterTest; if !is_x86_feature_detected!("avx2") { return; } // SAFETY: The safety of find only requires that the current CPU // support AVX2, which we checked above. unsafe { PrefilterTest::run_all_tests_filter(find, |t| { // This substring searcher only works on certain configs, so // filter our tests such that Forward::new will be guaranteed // to succeed. (And also remove tests with a haystack that is // too small.) let fwd = match super::Forward::new(&t.ninfo, &t.needle) { None => return false, Some(fwd) => fwd, }; t.haystack.len() >= fwd.min_haystack_len() }) } } } memchr-2.4.1/src/memmem/x86/mod.rs000064400000000000000000000000500072674642500147440ustar 00000000000000pub(crate) mod avx; pub(crate) mod sse; memchr-2.4.1/src/memmem/x86/sse.rs000064400000000000000000000055400072674642500147700ustar 00000000000000use core::arch::x86_64::__m128i; use crate::memmem::{genericsimd, NeedleInfo}; /// An SSE accelerated vectorized substring search routine that only works on /// small needles. #[derive(Clone, Copy, Debug)] pub(crate) struct Forward(genericsimd::Forward); impl Forward { /// Create a new "generic simd" forward searcher. If one could not be /// created from the given inputs, then None is returned. pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option { if !cfg!(memchr_runtime_sse2) { return None; } genericsimd::Forward::new(ninfo, needle).map(Forward) } /// Returns the minimum length of haystack that is needed for this searcher /// to work. Passing a haystack with a length smaller than this will cause /// `find` to panic. #[inline(always)] pub(crate) fn min_haystack_len(&self) -> usize { self.0.min_haystack_len::<__m128i>() } #[inline(always)] pub(crate) fn find( &self, haystack: &[u8], needle: &[u8], ) -> Option { // SAFETY: sse2 is enabled on all x86_64 targets, so this is always // safe to call. unsafe { self.find_impl(haystack, needle) } } /// The implementation of find marked with the appropriate target feature. /// /// # Safety /// /// This is safe to call in all cases since sse2 is guaranteed to be part /// of x86_64. It is marked as unsafe because of the target feature /// attribute. #[target_feature(enable = "sse2")] unsafe fn find_impl( &self, haystack: &[u8], needle: &[u8], ) -> Option { genericsimd::fwd_find::<__m128i>(&self.0, haystack, needle) } } #[cfg(all(test, feature = "std", not(miri)))] mod tests { use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; fn find( _: &mut PrefilterState, ninfo: &NeedleInfo, haystack: &[u8], needle: &[u8], ) -> Option { super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) } #[test] fn prefilter_permutations() { use crate::memmem::prefilter::tests::PrefilterTest; // SAFETY: sse2 is enabled on all x86_64 targets, so this is always // safe to call. unsafe { PrefilterTest::run_all_tests_filter(find, |t| { // This substring searcher only works on certain configs, so // filter our tests such that Forward::new will be guaranteed // to succeed. (And also remove tests with a haystack that is // too small.) let fwd = match super::Forward::new(&t.ninfo, &t.needle) { None => return false, Some(fwd) => fwd, }; t.haystack.len() >= fwd.min_haystack_len() }) } } } memchr-2.4.1/src/tests/memchr/iter.rs000064400000000000000000000141140072674642500156510ustar 00000000000000use quickcheck::quickcheck; use crate::{tests::memchr::testdata::memchr_tests, Memchr, Memchr2, Memchr3}; #[test] fn memchr1_iter() { for test in memchr_tests() { test.iter_one(false, Memchr::new); } } #[test] fn memchr2_iter() { for test in memchr_tests() { test.iter_two(false, Memchr2::new); } } #[test] fn memchr3_iter() { for test in memchr_tests() { test.iter_three(false, Memchr3::new); } } #[test] fn memrchr1_iter() { for test in memchr_tests() { test.iter_one(true, |n1, corpus| Memchr::new(n1, corpus).rev()); } } #[test] fn memrchr2_iter() { for test in memchr_tests() { test.iter_two(true, |n1, n2, corpus| { Memchr2::new(n1, n2, corpus).rev() }) } } #[test] fn memrchr3_iter() { for test in memchr_tests() { test.iter_three(true, |n1, n2, n3, corpus| { Memchr3::new(n1, n2, n3, corpus).rev() }) } } quickcheck! { fn qc_memchr_double_ended_iter( needle: u8, data: Vec, take_side: Vec ) -> bool { // make nonempty let mut take_side = take_side; if take_side.is_empty() { take_side.push(true) }; let iter = Memchr::new(needle, &data); let all_found = double_ended_take( iter, take_side.iter().cycle().cloned()); all_found.iter().cloned().eq(positions1(needle, &data)) } fn qc_memchr2_double_ended_iter( needle1: u8, needle2: u8, data: Vec, take_side: Vec ) -> bool { // make nonempty let mut take_side = take_side; if take_side.is_empty() { take_side.push(true) }; let iter = Memchr2::new(needle1, needle2, &data); let all_found = double_ended_take( iter, take_side.iter().cycle().cloned()); all_found.iter().cloned().eq(positions2(needle1, needle2, &data)) } fn qc_memchr3_double_ended_iter( needle1: u8, needle2: u8, needle3: u8, data: Vec, take_side: Vec ) -> bool { // make nonempty let mut take_side = take_side; if take_side.is_empty() { take_side.push(true) }; let iter = Memchr3::new(needle1, needle2, needle3, &data); let all_found = double_ended_take( iter, take_side.iter().cycle().cloned()); all_found .iter() .cloned() .eq(positions3(needle1, needle2, needle3, &data)) } fn qc_memchr1_iter(data: Vec) -> bool { let needle = 0; let answer = positions1(needle, &data); answer.eq(Memchr::new(needle, &data)) } fn qc_memchr1_rev_iter(data: Vec) -> bool { let needle = 0; let answer = positions1(needle, &data); answer.rev().eq(Memchr::new(needle, &data).rev()) } fn qc_memchr2_iter(data: Vec) -> bool { let needle1 = 0; let needle2 = 1; let answer = positions2(needle1, needle2, &data); answer.eq(Memchr2::new(needle1, needle2, &data)) } fn qc_memchr2_rev_iter(data: Vec) -> bool { let needle1 = 0; let needle2 = 1; let answer = positions2(needle1, needle2, &data); answer.rev().eq(Memchr2::new(needle1, needle2, &data).rev()) } fn qc_memchr3_iter(data: Vec) -> bool { let needle1 = 0; let needle2 = 1; let needle3 = 2; let answer = positions3(needle1, needle2, needle3, &data); answer.eq(Memchr3::new(needle1, needle2, needle3, &data)) } fn qc_memchr3_rev_iter(data: Vec) -> bool { let needle1 = 0; let needle2 = 1; let needle3 = 2; let answer = positions3(needle1, needle2, needle3, &data); answer.rev().eq(Memchr3::new(needle1, needle2, needle3, &data).rev()) } fn qc_memchr1_iter_size_hint(data: Vec) -> bool { // test that the size hint is within reasonable bounds let needle = 0; let mut iter = Memchr::new(needle, &data); let mut real_count = data .iter() .filter(|&&elt| elt == needle) .count(); while let Some(index) = iter.next() { real_count -= 1; let (lower, upper) = iter.size_hint(); assert!(lower <= real_count); assert!(upper.unwrap() >= real_count); assert!(upper.unwrap() <= data.len() - index); } true } } // take items from a DEI, taking front for each true and back for each false. // Return a vector with the concatenation of the fronts and the reverse of the // backs. fn double_ended_take(mut iter: I, take_side: J) -> Vec where I: DoubleEndedIterator, J: Iterator, { let mut found_front = Vec::new(); let mut found_back = Vec::new(); for take_front in take_side { if take_front { if let Some(pos) = iter.next() { found_front.push(pos); } else { break; } } else { if let Some(pos) = iter.next_back() { found_back.push(pos); } else { break; } }; } let mut all_found = found_front; all_found.extend(found_back.into_iter().rev()); all_found } // return an iterator of the 0-based indices of haystack that match the needle fn positions1<'a>( n1: u8, haystack: &'a [u8], ) -> Box + 'a> { let it = haystack .iter() .enumerate() .filter(move |&(_, &b)| b == n1) .map(|t| t.0); Box::new(it) } fn positions2<'a>( n1: u8, n2: u8, haystack: &'a [u8], ) -> Box + 'a> { let it = haystack .iter() .enumerate() .filter(move |&(_, &b)| b == n1 || b == n2) .map(|t| t.0); Box::new(it) } fn positions3<'a>( n1: u8, n2: u8, n3: u8, haystack: &'a [u8], ) -> Box + 'a> { let it = haystack .iter() .enumerate() .filter(move |&(_, &b)| b == n1 || b == n2 || b == n3) .map(|t| t.0); Box::new(it) } memchr-2.4.1/src/tests/memchr/memchr.rs000064400000000000000000000050470072674642500161660ustar 00000000000000use quickcheck::quickcheck; use crate::{ memchr, memchr::{fallback, naive}, memchr2, memchr3, memrchr, memrchr2, memrchr3, tests::memchr::testdata::memchr_tests, }; #[test] fn memchr1_find() { for test in memchr_tests() { test.one(false, memchr); } } #[test] fn memchr1_fallback_find() { for test in memchr_tests() { test.one(false, fallback::memchr); } } #[test] fn memchr2_find() { for test in memchr_tests() { test.two(false, memchr2); } } #[test] fn memchr2_fallback_find() { for test in memchr_tests() { test.two(false, fallback::memchr2); } } #[test] fn memchr3_find() { for test in memchr_tests() { test.three(false, memchr3); } } #[test] fn memchr3_fallback_find() { for test in memchr_tests() { test.three(false, fallback::memchr3); } } #[test] fn memrchr1_find() { for test in memchr_tests() { test.one(true, memrchr); } } #[test] fn memrchr1_fallback_find() { for test in memchr_tests() { test.one(true, fallback::memrchr); } } #[test] fn memrchr2_find() { for test in memchr_tests() { test.two(true, memrchr2); } } #[test] fn memrchr2_fallback_find() { for test in memchr_tests() { test.two(true, fallback::memrchr2); } } #[test] fn memrchr3_find() { for test in memchr_tests() { test.three(true, memrchr3); } } #[test] fn memrchr3_fallback_find() { for test in memchr_tests() { test.three(true, fallback::memrchr3); } } quickcheck! { fn qc_memchr1_matches_naive(n1: u8, corpus: Vec) -> bool { memchr(n1, &corpus) == naive::memchr(n1, &corpus) } } quickcheck! { fn qc_memchr2_matches_naive(n1: u8, n2: u8, corpus: Vec) -> bool { memchr2(n1, n2, &corpus) == naive::memchr2(n1, n2, &corpus) } } quickcheck! { fn qc_memchr3_matches_naive( n1: u8, n2: u8, n3: u8, corpus: Vec ) -> bool { memchr3(n1, n2, n3, &corpus) == naive::memchr3(n1, n2, n3, &corpus) } } quickcheck! { fn qc_memrchr1_matches_naive(n1: u8, corpus: Vec) -> bool { memrchr(n1, &corpus) == naive::memrchr(n1, &corpus) } } quickcheck! { fn qc_memrchr2_matches_naive(n1: u8, n2: u8, corpus: Vec) -> bool { memrchr2(n1, n2, &corpus) == naive::memrchr2(n1, n2, &corpus) } } quickcheck! { fn qc_memrchr3_matches_naive( n1: u8, n2: u8, n3: u8, corpus: Vec ) -> bool { memrchr3(n1, n2, n3, &corpus) == naive::memrchr3(n1, n2, n3, &corpus) } } memchr-2.4.1/src/tests/memchr/mod.rs000064400000000000000000000002500072674642500154610ustar 00000000000000#[cfg(all(feature = "std", not(miri)))] mod iter; #[cfg(all(feature = "std", not(miri)))] mod memchr; mod simple; #[cfg(all(feature = "std", not(miri)))] mod testdata; memchr-2.4.1/src/tests/memchr/simple.rs000064400000000000000000000020360072674642500161770ustar 00000000000000// Simple tests using MIRI. These are intended only to be a simple exercise of // memchr when tests are run under miri. These are mostly necessary because the // other tests are far more extensive and take too long to run under miri. // // These tests are also run when the 'std' feature is not enabled. use crate::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3}; #[test] fn simple() { assert_eq!(memchr(b'a', b"abcda"), Some(0)); assert_eq!(memchr(b'z', b"abcda"), None); assert_eq!(memchr2(b'a', b'z', b"abcda"), Some(0)); assert_eq!(memchr2(b'z', b'y', b"abcda"), None); assert_eq!(memchr3(b'a', b'z', b'b', b"abcda"), Some(0)); assert_eq!(memchr3(b'z', b'y', b'x', b"abcda"), None); assert_eq!(memrchr(b'a', b"abcda"), Some(4)); assert_eq!(memrchr(b'z', b"abcda"), None); assert_eq!(memrchr2(b'a', b'z', b"abcda"), Some(4)); assert_eq!(memrchr2(b'z', b'y', b"abcda"), None); assert_eq!(memrchr3(b'a', b'z', b'b', b"abcda"), Some(4)); assert_eq!(memrchr3(b'z', b'y', b'x', b"abcda"), None); } memchr-2.4.1/src/tests/memchr/testdata.rs000064400000000000000000000301630072674642500165210ustar 00000000000000use std::iter::repeat; /// Create a sequence of tests that should be run by memchr implementations. pub fn memchr_tests() -> Vec { let mut tests = Vec::new(); for statict in MEMCHR_TESTS { assert!(!statict.corpus.contains("%"), "% is not allowed in corpora"); assert!(!statict.corpus.contains("#"), "# is not allowed in corpora"); assert!(!statict.needles.contains(&b'%'), "% is an invalid needle"); assert!(!statict.needles.contains(&b'#'), "# is an invalid needle"); let t = MemchrTest { corpus: statict.corpus.to_string(), needles: statict.needles.to_vec(), positions: statict.positions.to_vec(), }; tests.push(t.clone()); tests.extend(t.expand()); } tests } /// A set of tests for memchr-like functions. /// /// These tests mostly try to cover the short string cases. We cover the longer /// string cases via the benchmarks (which are tests themselves), via /// quickcheck tests and via automatic expansion of each test case (by /// increasing the corpus size). Finally, we cover different alignment cases /// in the tests by varying the starting point of the slice. const MEMCHR_TESTS: &[MemchrTestStatic] = &[ // one needle (applied to memchr + memchr2 + memchr3) MemchrTestStatic { corpus: "a", needles: &[b'a'], positions: &[0] }, MemchrTestStatic { corpus: "aa", needles: &[b'a'], positions: &[0, 1] }, MemchrTestStatic { corpus: "aaa", needles: &[b'a'], positions: &[0, 1, 2], }, MemchrTestStatic { corpus: "", needles: &[b'a'], positions: &[] }, MemchrTestStatic { corpus: "z", needles: &[b'a'], positions: &[] }, MemchrTestStatic { corpus: "zz", needles: &[b'a'], positions: &[] }, MemchrTestStatic { corpus: "zza", needles: &[b'a'], positions: &[2] }, MemchrTestStatic { corpus: "zaza", needles: &[b'a'], positions: &[1, 3] }, MemchrTestStatic { corpus: "zzza", needles: &[b'a'], positions: &[3] }, MemchrTestStatic { corpus: "\x00a", needles: &[b'a'], positions: &[1] }, MemchrTestStatic { corpus: "\x00", needles: &[b'\x00'], positions: &[0] }, MemchrTestStatic { corpus: "\x00\x00", needles: &[b'\x00'], positions: &[0, 1], }, MemchrTestStatic { corpus: "\x00a\x00", needles: &[b'\x00'], positions: &[0, 2], }, MemchrTestStatic { corpus: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[16], }, MemchrTestStatic { corpus: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[32], }, // two needles (applied to memchr2 + memchr3) MemchrTestStatic { corpus: "az", needles: &[b'a', b'z'], positions: &[0, 1], }, MemchrTestStatic { corpus: "az", needles: &[b'a', b'z'], positions: &[0, 1], }, MemchrTestStatic { corpus: "az", needles: &[b'x', b'y'], positions: &[] }, MemchrTestStatic { corpus: "az", needles: &[b'a', b'y'], positions: &[0] }, MemchrTestStatic { corpus: "az", needles: &[b'x', b'z'], positions: &[1] }, MemchrTestStatic { corpus: "yyyyaz", needles: &[b'a', b'z'], positions: &[4, 5], }, MemchrTestStatic { corpus: "yyyyaz", needles: &[b'z', b'a'], positions: &[4, 5], }, // three needles (applied to memchr3) MemchrTestStatic { corpus: "xyz", needles: &[b'x', b'y', b'z'], positions: &[0, 1, 2], }, MemchrTestStatic { corpus: "zxy", needles: &[b'x', b'y', b'z'], positions: &[0, 1, 2], }, MemchrTestStatic { corpus: "zxy", needles: &[b'x', b'a', b'z'], positions: &[0, 1], }, MemchrTestStatic { corpus: "zxy", needles: &[b't', b'a', b'z'], positions: &[0], }, MemchrTestStatic { corpus: "yxz", needles: &[b't', b'a', b'z'], positions: &[2], }, ]; /// A description of a test on a memchr like function. #[derive(Clone, Debug)] pub struct MemchrTest { /// The thing to search. We use `&str` instead of `&[u8]` because they /// are nicer to write in tests, and we don't miss much since memchr /// doesn't care about UTF-8. /// /// Corpora cannot contain either '%' or '#'. We use these bytes when /// expanding test cases into many test cases, and we assume they are not /// used. If they are used, `memchr_tests` will panic. corpus: String, /// The needles to search for. This is intended to be an "alternation" of /// needles. The number of needles may cause this test to be skipped for /// some memchr variants. For example, a test with 2 needles cannot be used /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. /// However, a test with only 1 needle can be used to test all of `memchr`, /// `memchr2` and `memchr3`. We achieve this by filling in the needles with /// bytes that we never used in the corpus (such as '#'). needles: Vec, /// The positions expected to match for all of the needles. positions: Vec, } /// Like MemchrTest, but easier to define as a constant. #[derive(Clone, Debug)] pub struct MemchrTestStatic { corpus: &'static str, needles: &'static [u8], positions: &'static [usize], } impl MemchrTest { pub fn one Option>(&self, reverse: bool, f: F) { let needles = match self.needles(1) { None => return, Some(needles) => needles, }; // We test different alignments here. Since some implementations use // AVX2, which can read 32 bytes at a time, we test at least that. // Moreover, with loop unrolling, we sometimes process 64 (sse2) or 128 // (avx) bytes at a time, so we include that in our offsets as well. // // You might think this would cause most needles to not be found, but // we actually expand our tests to include corpus sizes all the way up // to >500 bytes, so we should exercise most branches. for align in 0..130 { let corpus = self.corpus(align); assert_eq!( self.positions(align, reverse).get(0).cloned(), f(needles[0], corpus.as_bytes()), "search for {:?} failed in: {:?} (len: {}, alignment: {})", needles[0] as char, corpus, corpus.len(), align ); } } pub fn two Option>( &self, reverse: bool, f: F, ) { let needles = match self.needles(2) { None => return, Some(needles) => needles, }; for align in 0..130 { let corpus = self.corpus(align); assert_eq!( self.positions(align, reverse).get(0).cloned(), f(needles[0], needles[1], corpus.as_bytes()), "search for {:?}|{:?} failed in: {:?} \ (len: {}, alignment: {})", needles[0] as char, needles[1] as char, corpus, corpus.len(), align ); } } pub fn three Option>( &self, reverse: bool, f: F, ) { let needles = match self.needles(3) { None => return, Some(needles) => needles, }; for align in 0..130 { let corpus = self.corpus(align); assert_eq!( self.positions(align, reverse).get(0).cloned(), f(needles[0], needles[1], needles[2], corpus.as_bytes()), "search for {:?}|{:?}|{:?} failed in: {:?} \ (len: {}, alignment: {})", needles[0] as char, needles[1] as char, needles[2] as char, corpus, corpus.len(), align ); } } pub fn iter_one<'a, I, F>(&'a self, reverse: bool, f: F) where F: FnOnce(u8, &'a [u8]) -> I, I: Iterator, { if let Some(ns) = self.needles(1) { self.iter(reverse, f(ns[0], self.corpus.as_bytes())); } } pub fn iter_two<'a, I, F>(&'a self, reverse: bool, f: F) where F: FnOnce(u8, u8, &'a [u8]) -> I, I: Iterator, { if let Some(ns) = self.needles(2) { self.iter(reverse, f(ns[0], ns[1], self.corpus.as_bytes())); } } pub fn iter_three<'a, I, F>(&'a self, reverse: bool, f: F) where F: FnOnce(u8, u8, u8, &'a [u8]) -> I, I: Iterator, { if let Some(ns) = self.needles(3) { self.iter(reverse, f(ns[0], ns[1], ns[2], self.corpus.as_bytes())); } } /// Test that the positions yielded by the given iterator match the /// positions in this test. If reverse is true, then reverse the positions /// before comparing them. fn iter>(&self, reverse: bool, it: I) { assert_eq!( self.positions(0, reverse), it.collect::>(), r"search for {:?} failed in: {:?}", self.needles.iter().map(|&b| b as char).collect::>(), self.corpus ); } /// Expand this test into many variations of the same test. /// /// In particular, this will generate more tests with larger corpus sizes. /// The expected positions are updated to maintain the integrity of the /// test. /// /// This is important in testing a memchr implementation, because there are /// often different cases depending on the length of the corpus. /// /// Note that we extend the corpus by adding `%` bytes, which we /// don't otherwise use as a needle. fn expand(&self) -> Vec { let mut more = Vec::new(); // Add bytes to the start of the corpus. for i in 1..515 { let mut t = self.clone(); let mut new_corpus: String = repeat('%').take(i).collect(); new_corpus.push_str(&t.corpus); t.corpus = new_corpus; t.positions = t.positions.into_iter().map(|p| p + i).collect(); more.push(t); } // Add bytes to the end of the corpus. for i in 1..515 { let mut t = self.clone(); let padding: String = repeat('%').take(i).collect(); t.corpus.push_str(&padding); more.push(t); } more } /// Return the corpus at the given alignment. /// /// If the alignment exceeds the length of the corpus, then this returns /// an empty slice. fn corpus(&self, align: usize) -> &str { self.corpus.get(align..).unwrap_or("") } /// Return exactly `count` needles from this test. If this test has less /// than `count` needles, then add `#` until the number of needles /// matches `count`. If this test has more than `count` needles, then /// return `None` (because there is no way to use this test data for a /// search using fewer needles). fn needles(&self, count: usize) -> Option> { if self.needles.len() > count { return None; } let mut needles = self.needles.to_vec(); for _ in needles.len()..count { // we assume # is never used in tests. needles.push(b'#'); } Some(needles) } /// Return the positions in this test, reversed if `reverse` is true. /// /// If alignment is given, then all positions greater than or equal to that /// alignment are offset by the alignment. Positions less than the /// alignment are dropped. fn positions(&self, align: usize, reverse: bool) -> Vec { let positions = if reverse { let mut positions = self.positions.to_vec(); positions.reverse(); positions } else { self.positions.to_vec() }; positions .into_iter() .filter(|&p| p >= align) .map(|p| p - align) .collect() } } memchr-2.4.1/src/tests/mod.rs000064400000000000000000000005070072674642500142130ustar 00000000000000mod memchr; // For debugging, particularly in CI, print out the byte order of the current // target. #[cfg(all(feature = "std", target_endian = "little"))] #[test] fn byte_order() { eprintln!("LITTLE ENDIAN"); } #[cfg(all(feature = "std", target_endian = "big"))] #[test] fn byte_order() { eprintln!("BIG ENDIAN"); } memchr-2.4.1/src/tests/x86_64-soft_float.json000064400000000000000000000010070072674642500170510ustar 00000000000000{ "llvm-target": "x86_64-unknown-none", "target-endian": "little", "target-pointer-width": "64", "target-c-int-width": "32", "os": "none", "arch": "x86_64", "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", "linker-flavor": "ld.lld", "linker": "rust-lld", "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-3dnow,-3dnowa,-avx,-avx2,+soft-float", "executables": true, "disable-redzone": true, "panic-strategy": "abort" }