)
# Build tests only if requested
if(RAPIDFUZZ_BUILD_TESTING AND NOT_SUBPROJECT)
include(CTest)
enable_testing()
add_subdirectory(test)
endif()
# Build examples only if requested
if(RAPIDFUZZ_BUILD_EXAMPLES)
#add_subdirectory(examples)
endif()
# Build benchmarks only if requested
if(RAPIDFUZZ_BUILD_BENCHMARKS)
add_subdirectory(bench)
endif()
# Build fuzz tests only if requested
if(RAPIDFUZZ_BUILD_FUZZERS)
add_subdirectory(fuzzing)
endif()
if (RAPIDFUZZ_INSTALL)
set(RAPIDFUZZ_CMAKE_CONFIG_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/rapidfuzz")
install(
TARGETS
rapidfuzz
EXPORT
rapidfuzzTargets
DESTINATION
${CMAKE_INSTALL_LIBDIR}
)
install(
EXPORT
rapidfuzzTargets
NAMESPACE
rapidfuzz::
DESTINATION
${RAPIDFUZZ_CMAKE_CONFIG_DESTINATION}
)
install(
DIRECTORY
rapidfuzz
DESTINATION
${CMAKE_INSTALL_INCLUDEDIR}
FILES_MATCHING
PATTERN "*.hpp"
PATTERN "*.impl"
)
configure_package_config_file(
${CMAKE_CURRENT_LIST_DIR}/cmake/${PROJECT_NAME}Config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
INSTALL_DESTINATION ${RAPIDFUZZ_CMAKE_CONFIG_DESTINATION}
)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
COMPATIBILITY SameMajorVersion
)
install(
FILES
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
DESTINATION
${RAPIDFUZZ_CMAKE_CONFIG_DESTINATION}
)
# CPack/CMake started taking the package version from project version 3.12
# So we need to set the version manually for older CMake versions
if(${CMAKE_VERSION} VERSION_LESS "3.12.0")
set(CPACK_PACKAGE_VERSION ${PROJECT_VERSION})
endif()
set(CPACK_PACKAGE_VENDOR "Max Bachmann")
set(CPACK_PACKAGE_CONTACT "https://github.com/rapidfuzz/rapidfuzz-cpp")
include(CPack)
endif(RAPIDFUZZ_INSTALL)
rapidfuzz-cpp-3.3.1/Doxyfile 0000664 0000000 0000000 00000006664 14744034430 0016041 0 ustar 00root root 0000000 0000000 # Doxyfile 1.8.20
PROJECT_NAME = RapidFuzz
OUTPUT_DIRECTORY = doxygen
# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
# to include (a tag file for) the STL sources as input, then you should set this
# tag to YES in order to let doxygen match functions declarations and
# definitions whose arguments contain STL classes (e.g. func(std::string);
# versus func(std::string) {}). This also make the inheritance and collaboration
# diagrams that involve STL classes more complete and accurate.
# The default value is: NO.
BUILTIN_STL_SUPPORT = YES
EXTRACT_PRIVATE = YES
EXTRACT_STATIC = YES
HIDE_UNDOC_MEMBERS = YES
HIDE_UNDOC_CLASSES = YES
# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
# their full class and namespace scopes in the documentation. If set to YES, the
# scope will be hidden.
# The default value is: NO.
HIDE_SCOPE_NAMES = NO
# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
# append additional text to a page's title, such as Class Reference. If set to
# YES the compound reference will be hidden.
# The default value is: NO.
HIDE_COMPOUND_REFERENCE= NO
# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
# the files that are included by a file in the documentation of that file.
# The default value is: YES.
SHOW_INCLUDE_FILES = YES
# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
# grouped member an include statement to the documentation, telling the reader
# which file to include in order to use the member.
# The default value is: NO.
SHOW_GROUPED_MEMB_INC = YES
# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
# list. This list is created by putting \todo commands in the documentation.
# The default value is: YES.
GENERATE_TODOLIST = NO
SHOW_FILES = NO
# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
# the reference definitions. This must be a list of .bib files. The .bib
# extension is automatically appended if omitted. This requires the bibtex tool
# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
# For LaTeX the style of the bibliography can be controlled using
# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
# search path. See also \cite for info how to create references.
CITE_BIB_FILES = docs/literature/hyrro_lcs_2004 \
docs/literature/hyrro_2002 \
docs/literature/hyrro_2004 \
docs/literature/myers_1999 \
docs/literature/wagner_fischer_1974
EXTRA_PACKAGES = amsmath xr amsfonts
#---------------------------------------------------------------------------
# Configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = rapidfuzz
FILE_PATTERNS = *.c \
*.cxx \
*.cpp \
*.h \
*.hpp \
*.md
#---------------------------------------------------------------------------
# Configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = NO
HAVE_DOT = YES
rapidfuzz-cpp-3.3.1/LICENSE 0000664 0000000 0000000 00000002074 14744034430 0015327 0 ustar 00root root 0000000 0000000 Copyright © 2020 Max Bachmann
Copyright © 2011 Adam Cohen
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
rapidfuzz-cpp-3.3.1/README.md 0000664 0000000 0000000 00000021244 14744034430 0015601 0 ustar 00root root 0000000 0000000
Rapid fuzzy string matching in C++ using the Levenshtein Distance
Description •
Installation •
Usage •
License
---
## Description
RapidFuzz is a fast string matching library for Python and C++, which is using the string similarity calculations from [FuzzyWuzzy](https://github.com/seatgeek/fuzzywuzzy). However, there are two aspects that set RapidFuzz apart from FuzzyWuzzy:
1) It is MIT licensed so it can be used whichever License you might want to choose for your project, while you're forced to adopt the GPL license when using FuzzyWuzzy
2) It is mostly written in C++ and on top of this comes with a lot of Algorithmic improvements to make string matching even faster, while still providing the same results. More details on these performance improvements in the form of benchmarks can be found [here](https://github.com/rapidfuzz/rapidfuzz/blob/master/Benchmarks.md)
The Library is split across multiple repositories for the different supported programming languages:
- The C++ version is versioned in this repository
- The Python version can be found at [rapidfuzz/rapidfuzz](https://github.com/rapidfuzz/rapidfuzz)
## CMake Integration
There are severals ways to integrate `rapidfuzz` in your CMake project.
### By Installing it
```bash
git clone https://github.com/rapidfuzz/rapidfuzz-cpp.git rapidfuzz-cpp
cd rapidfuzz-cpp
mkdir build && cd build
cmake .. -DCMAKE_BUILD_TYPE=Release
cmake --build .
cmake --build . --target install
```
Then in your CMakeLists.txt:
```cmake
find_package(rapidfuzz REQUIRED)
add_executable(foo main.cpp)
target_link_libraries(foo rapidfuzz::rapidfuzz)
```
### Add this repository as a submodule
```bash
git submodule add https://github.com/rapidfuzz/rapidfuzz-cpp.git 3rdparty/RapidFuzz
```
Then you can either:
1. include it as a subdirectory
```cmake
add_subdirectory(3rdparty/RapidFuzz)
add_executable(foo main.cpp)
target_link_libraries(foo rapidfuzz::rapidfuzz)
```
2. build it at configure time with `FetchContent`
```cmake
FetchContent_Declare(
rapidfuzz
SOURCE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/RapidFuzz
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/rapidfuzz
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH= "${CMAKE_OPT_ARGS}"
)
FetchContent_MakeAvailable(rapidfuzz)
add_executable(foo main.cpp)
target_link_libraries(foo PRIVATE rapidfuzz::rapidfuzz)
```
### Download it at configure time
If you don't want to add `rapidfuzz-cpp` as a submodule, you can also download it with `FetchContent`:
```cmake
FetchContent_Declare(rapidfuzz
GIT_REPOSITORY https://github.com/rapidfuzz/rapidfuzz-cpp.git
GIT_TAG main)
FetchContent_MakeAvailable(rapidfuzz)
add_executable(foo main.cpp)
target_link_libraries(foo PRIVATE rapidfuzz::rapidfuzz)
```
It will be downloaded each time you run CMake in a blank folder.
## CMake option
There are CMake options available:
1. `RAPIDFUZZ_BUILD_TESTING` : to build test (default OFF and requires [Catch2](https://github.com/catchorg/Catch2))
2. `RAPIDFUZZ_BUILD_BENCHMARKS` : to build benchmarks (default OFF and requires [Google Benchmark](https://github.com/google/benchmark))
3. `RAPIDFUZZ_INSTALL` : to install the library to local computer
- When configured independently, installation is on.
- When used as a subproject, the installation is turned off by default.
- For library developers, you might want to toggle the behavior depending on your project.
- If your project is exported via `CMake`, turn installation on or export error will result.
- If your project publicly depends on `RapidFuzz` (includes `rapidfuzz.hpp` in header),
turn installation on or apps depending on your project would face include errors.
## Usage
```cpp
#include
```
### Simple Ratio
```cpp
using rapidfuzz::fuzz::ratio;
// score is 96.55171966552734
double score = rapidfuzz::fuzz::ratio("this is a test", "this is a test!");
```
### Partial Ratio
```cpp
// score is 100
double score = rapidfuzz::fuzz::partial_ratio("this is a test", "this is a test!");
```
### Token Sort Ratio
```cpp
// score is 90.90908813476562
double score = rapidfuzz::fuzz::ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")
// score is 100
double score = rapidfuzz::fuzz::token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")
```
### Token Set Ratio
```cpp
// score is 83.8709716796875
double score = rapidfuzz::fuzz::token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
// score is 100
double score = rapidfuzz::fuzz::token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
```
### Process
In the Python implementation, there is a module process, which is used to compare e.g. a string to a list of strings.
In Python, this both saves the time to implement those features yourself and can be a lot more efficient than repeated type
conversions between Python and C++. Implementing a similar function in C++ using templates is not easily possible and probably slower than implementing them on your own. That's why this section describes how users can implement those features with a couple of lines of code using the C++ library.
### extract
The following function compares a query string to all strings in a list of choices. It returns all
elements with a similarity over score_cutoff. Generally make use of the cached implementations when comparing
a string to multiple strings.
```cpp
template
std::vector>
extract(const Sentence1& query, const Iterable& choices, const double score_cutoff = 0.0)
{
std::vector> results;
rapidfuzz::fuzz::CachedRatio scorer(query);
for (const auto& choice : choices) {
double score = scorer.similarity(choice, score_cutoff);
if (score >= score_cutoff) {
results.emplace_back(choice, score);
}
}
return results;
}
```
### extractOne
The following function compares a query string to all strings in a list of choices.
```cpp
template
std::optional>
extractOne(const Sentence1& query, const Iterable& choices, const double score_cutoff = 0.0)
{
bool match_found = false;
double best_score = score_cutoff;
Sentence2 best_match;
rapidfuzz::fuzz::CachedRatio scorer(query);
for (const auto& choice : choices) {
double score = scorer.similarity(choice, best_score);
if (score >= best_score) {
match_found = true;
best_score = score;
best_match = choice;
}
}
if (!match_found) {
return nullopt;
}
return std::make_pair(best_match, best_score);
}
```
### multithreading
It is very simple to use those scorers e.g. with open OpenMP to achieve better performance.
```cpp
template
std::vector>
extract(const Sentence1& query, const Iterable& choices, const double score_cutoff = 0.0)
{
std::vector> results(choices.size());
rapidfuzz::fuzz::CachedRatio scorer(query);
#pragma omp parallel for
for (size_t i = 0; i < choices.size(); ++i) {
double score = scorer.similarity(choices[i], score_cutoff);
results[i] = std::make_pair(choices[i], score);
}
return results;
}
```
## License
RapidFuzz is licensed under the MIT license since I believe that everyone should be able to use it without being forced to adopt the GPL license. That's why the library is based on an older version of fuzzywuzzy that was MIT-licensed as well.
This old version of fuzzywuzzy can be found [here](https://github.com/seatgeek/fuzzywuzzy/tree/4bf28161f7005f3aa9d4d931455ac55126918df7).
rapidfuzz-cpp-3.3.1/SECURITY.md 0000664 0000000 0000000 00000001733 14744034430 0016114 0 ustar 00root root 0000000 0000000 ## Reporting Security Issues
If you believe you have found a security vulnerability in the project, please report it to us through coordinated disclosure.
**Please do not report security vulnerabilities through public GitHub issues, discussions, or pull requests.**
Instead, please send an email to oss@maxbachmann.de.
Please include as much of the information listed below as you can to help us better understand and resolve the issue:
* The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting)
* Full paths of source file(s) related to the manifestation of the issue
* The location of the affected source code (tag/branch/commit or direct URL)
* Any special configuration required to reproduce the issue
* Step-by-step instructions to reproduce the issue
* Proof-of-concept or exploit code (if possible)
* Impact of the issue, including how an attacker might exploit the issue
This information will help us triage your report more quickly.
rapidfuzz-cpp-3.3.1/bench/ 0000775 0000000 0000000 00000000000 14744034430 0015376 5 ustar 00root root 0000000 0000000 rapidfuzz-cpp-3.3.1/bench/CMakeLists.txt 0000664 0000000 0000000 00000001562 14744034430 0020142 0 ustar 00root root 0000000 0000000 include(FetchContent)
FetchContent_Declare(googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG v1.12.x)
FetchContent_Declare(googlebenchmark
GIT_REPOSITORY https://github.com/google/benchmark.git
GIT_TAG main) # need master for benchmark::benchmark
FetchContent_MakeAvailable(
googletest
googlebenchmark)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
function(rapidfuzz_add_benchmark NAME SOURCE)
add_executable(bench_${NAME} ${SOURCE})
target_link_libraries(bench_${NAME} PRIVATE ${PROJECT_NAME})
target_link_libraries(bench_${NAME} PRIVATE benchmark::benchmark)
endfunction()
rapidfuzz_add_benchmark(lcs bench-lcs.cpp)
rapidfuzz_add_benchmark(fuzz bench-fuzz.cpp)
rapidfuzz_add_benchmark(levenshtein bench-levenshtein.cpp)
rapidfuzz_add_benchmark(jarowinkler bench-jarowinkler.cpp)
rapidfuzz-cpp-3.3.1/bench/bench-fuzz.cpp 0000664 0000000 0000000 00000013171 14744034430 0020160 0 ustar 00root root 0000000 0000000 #include
#include
#include
#include
using rapidfuzz::fuzz::partial_ratio;
using rapidfuzz::fuzz::partial_token_ratio;
using rapidfuzz::fuzz::partial_token_set_ratio;
using rapidfuzz::fuzz::partial_token_sort_ratio;
using rapidfuzz::fuzz::ratio;
using rapidfuzz::fuzz::token_ratio;
using rapidfuzz::fuzz::token_set_ratio;
using rapidfuzz::fuzz::token_sort_ratio;
using rapidfuzz::fuzz::WRatio;
static void BM_FuzzRatio1(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
for (auto _ : state) {
benchmark::DoNotOptimize(ratio(a, a));
}
state.SetLabel("Similar Strings");
}
static void BM_FuzzRatio2(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
std::wstring b = L"bbbbb bbbbb";
for (auto _ : state) {
benchmark::DoNotOptimize(ratio(a, b));
}
state.SetLabel("Different Strings");
}
BENCHMARK(BM_FuzzRatio1);
BENCHMARK(BM_FuzzRatio2);
static void BM_FuzzPartialRatio1(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
for (auto _ : state) {
benchmark::DoNotOptimize(partial_ratio(a, a));
}
state.SetLabel("Similar Strings");
}
static void BM_FuzzPartialRatio2(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
std::wstring b = L"bbbbb bbbbb";
for (auto _ : state) {
benchmark::DoNotOptimize(partial_ratio(a, b));
}
state.SetLabel("Different Strings");
}
BENCHMARK(BM_FuzzPartialRatio1);
BENCHMARK(BM_FuzzPartialRatio2);
static void BM_FuzzTokenSort1(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
for (auto _ : state) {
benchmark::DoNotOptimize(token_sort_ratio(a, a));
}
state.SetLabel("Similar Strings");
}
static void BM_FuzzTokenSort2(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
std::wstring b = L"bbbbb bbbbb";
for (auto _ : state) {
benchmark::DoNotOptimize(token_sort_ratio(a, b));
}
state.SetLabel("Different Strings");
}
BENCHMARK(BM_FuzzTokenSort1);
BENCHMARK(BM_FuzzTokenSort2);
static void BM_FuzzPartialTokenSort1(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
for (auto _ : state) {
benchmark::DoNotOptimize(partial_token_sort_ratio(a, a));
}
state.SetLabel("Similar Strings");
}
static void BM_FuzzPartialTokenSort2(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
std::wstring b = L"bbbbb bbbbb";
for (auto _ : state) {
benchmark::DoNotOptimize(partial_token_sort_ratio(a, b));
}
state.SetLabel("Different Strings");
}
BENCHMARK(BM_FuzzPartialTokenSort1);
BENCHMARK(BM_FuzzPartialTokenSort2);
static void BM_FuzzTokenSet1(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
for (auto _ : state) {
benchmark::DoNotOptimize(token_set_ratio(a, a));
}
state.SetLabel("Similar Strings");
}
static void BM_FuzzTokenSet2(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
std::wstring b = L"bbbbb bbbbb";
for (auto _ : state) {
benchmark::DoNotOptimize(token_set_ratio(a, b));
}
state.SetLabel("Different Strings");
}
BENCHMARK(BM_FuzzTokenSet1);
BENCHMARK(BM_FuzzTokenSet2);
static void BM_FuzzPartialTokenSet1(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
for (auto _ : state) {
benchmark::DoNotOptimize(partial_token_set_ratio(a, a));
}
state.SetLabel("Similar Strings");
}
static void BM_FuzzPartialTokenSet2(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
std::wstring b = L"bbbbb bbbbb";
for (auto _ : state) {
benchmark::DoNotOptimize(partial_token_set_ratio(a, b));
}
state.SetLabel("Different Strings");
}
BENCHMARK(BM_FuzzPartialTokenSet1);
BENCHMARK(BM_FuzzPartialTokenSet2);
static void BM_FuzzToken1(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
for (auto _ : state) {
benchmark::DoNotOptimize(token_ratio(a, a));
}
state.SetLabel("Similar Strings");
}
static void BM_FuzzToken2(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
std::wstring b = L"bbbbb bbbbb";
for (auto _ : state) {
benchmark::DoNotOptimize(token_ratio(a, b));
}
state.SetLabel("Different Strings");
}
BENCHMARK(BM_FuzzToken1);
BENCHMARK(BM_FuzzToken2);
static void BM_FuzzPartialToken1(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
for (auto _ : state) {
benchmark::DoNotOptimize(partial_token_ratio(a, a));
}
state.SetLabel("Similar Strings");
}
static void BM_FuzzPartialToken2(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
std::wstring b = L"bbbbb bbbbb";
for (auto _ : state) {
benchmark::DoNotOptimize(partial_token_ratio(a, b));
}
state.SetLabel("Different Strings");
}
BENCHMARK(BM_FuzzPartialToken1);
BENCHMARK(BM_FuzzPartialToken2);
static void BM_FuzzWRatio1(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
for (auto _ : state) {
benchmark::DoNotOptimize(WRatio(a, a));
}
state.SetLabel("Similar Strings");
}
static void BM_FuzzWRatio3(benchmark::State& state)
{
std::wstring a = L"aaaaa aaaaa";
std::wstring b = L"bbbbb bbbbb";
for (auto _ : state) {
benchmark::DoNotOptimize(WRatio(a, b));
}
state.SetLabel("Different Strings");
}
static void BM_FuzzWRatio2(benchmark::State& state)
{
std::wstring a = L"aaaaa b";
std::wstring b = L"bbbbb bbbbbbbbb";
for (auto _ : state) {
benchmark::DoNotOptimize(WRatio(a, b));
}
state.SetLabel("Different length Strings");
}
BENCHMARK(BM_FuzzWRatio1);
BENCHMARK(BM_FuzzWRatio2);
BENCHMARK(BM_FuzzWRatio3);
BENCHMARK_MAIN();
rapidfuzz-cpp-3.3.1/bench/bench-jarowinkler.cpp 0000664 0000000 0000000 00000014666 14744034430 0021523 0 ustar 00root root 0000000 0000000 #include
#include
#include
#include
#include
std::string generate(int max_length)
{
std::string possible_characters = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
std::random_device rd;
std::mt19937 engine(rd());
std::uniform_int_distribution<> dist(0, static_cast(possible_characters.size() - 1));
std::string ret = "";
for (int i = 0; i < max_length; i++) {
int random_index = dist(engine);
ret += possible_characters[static_cast(random_index)];
}
return ret;
}
template
std::basic_string str_multiply(std::basic_string a, unsigned int b)
{
std::basic_string output;
while (b--)
output += a;
return output;
}
static void BM_JaroLongSimilarSequence(benchmark::State& state)
{
size_t len = state.range(0);
size_t score_cutoff = state.range(1);
std::string s1 = std::string("a") + str_multiply(std::string("b"), (len - 2)) + std::string("a");
std::string s2 = str_multiply(std::string("b"), len);
size_t num = 0;
for (auto _ : state) {
benchmark::DoNotOptimize(rapidfuzz::jaro_similarity(s1, s2));
++num;
}
state.counters["Rate"] = benchmark::Counter(static_cast(num * len), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num * len),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
static void BM_JaroLongNonSimilarSequence(benchmark::State& state)
{
size_t len = state.range(0);
size_t score_cutoff = state.range(1);
std::string s1 = str_multiply(std::string("a"), len);
std::string s2 = str_multiply(std::string("b"), len);
size_t num = 0;
for (auto _ : state) {
benchmark::DoNotOptimize(rapidfuzz::jaro_similarity(s1, s2));
++num;
}
state.counters["Rate"] = benchmark::Counter(static_cast(num * len), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num * len),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
#ifdef RAPIDFUZZ_SIMD
template
static void BM_Jaro_SIMD(benchmark::State& state)
{
std::vector seq1;
std::vector seq2;
std::vector results(64);
for (int i = 0; i < 64; i++)
seq1.push_back(generate(MaxLen1));
for (int i = 0; i < 10000; i++)
seq2.push_back(generate(MaxLen2));
size_t num = 0;
for (auto _ : state) {
rapidfuzz::experimental::MultiJaro scorer(seq1.size());
for (const auto& str1 : seq1)
scorer.insert(str1);
for (const auto& str2 : seq2)
scorer.similarity(&results[0], results.size(), str2);
num += seq1.size() * seq2.size();
}
state.counters["Rate"] = benchmark::Counter(static_cast(num), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
#endif
template
static void BM_Jaro(benchmark::State& state)
{
std::vector seq1;
std::vector seq2;
for (int i = 0; i < 256; i++)
seq1.push_back(generate(MaxLen1));
for (int i = 0; i < 10000; i++)
seq2.push_back(generate(MaxLen2));
size_t num = 0;
for (auto _ : state) {
for (size_t j = 0; j < seq2.size(); ++j)
for (size_t i = 0; i < seq1.size(); ++i)
benchmark::DoNotOptimize(rapidfuzz::jaro_similarity(seq1[i], seq2[j]));
num += seq1.size() * seq2.size();
}
state.counters["Rate"] = benchmark::Counter(static_cast(num), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
template
static void BM_Jaro_Cached(benchmark::State& state)
{
std::vector seq1;
std::vector seq2;
for (int i = 0; i < 256; i++)
seq1.push_back(generate(MaxLen1));
for (int i = 0; i < 10000; i++)
seq2.push_back(generate(MaxLen2));
size_t num = 0;
for (auto _ : state) {
for (const auto& str1 : seq1) {
rapidfuzz::CachedJaro scorer(str1);
for (size_t j = 0; j < seq2.size(); ++j)
benchmark::DoNotOptimize(scorer.similarity(seq2[j]));
}
num += seq1.size() * seq2.size();
}
state.counters["Rate"] = benchmark::Counter(static_cast(num), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
BENCHMARK_TEMPLATE(BM_Jaro, 8, 8);
BENCHMARK_TEMPLATE(BM_Jaro, 16, 16);
BENCHMARK_TEMPLATE(BM_Jaro, 32, 32);
BENCHMARK_TEMPLATE(BM_Jaro, 64, 64);
BENCHMARK_TEMPLATE(BM_Jaro_Cached, 8, 8);
BENCHMARK_TEMPLATE(BM_Jaro_Cached, 16, 16);
BENCHMARK_TEMPLATE(BM_Jaro_Cached, 32, 32);
BENCHMARK_TEMPLATE(BM_Jaro_Cached, 64, 64);
#ifdef RAPIDFUZZ_SIMD
BENCHMARK_TEMPLATE(BM_Jaro_SIMD, 8, 8);
BENCHMARK_TEMPLATE(BM_Jaro_SIMD, 16, 16);
BENCHMARK_TEMPLATE(BM_Jaro_SIMD, 32, 32);
BENCHMARK_TEMPLATE(BM_Jaro_SIMD, 64, 64);
#endif
BENCHMARK_TEMPLATE(BM_Jaro, 8, 1000);
BENCHMARK_TEMPLATE(BM_Jaro, 16, 1000);
BENCHMARK_TEMPLATE(BM_Jaro, 32, 1000);
BENCHMARK_TEMPLATE(BM_Jaro, 64, 1000);
BENCHMARK_TEMPLATE(BM_Jaro_Cached, 8, 1000);
BENCHMARK_TEMPLATE(BM_Jaro_Cached, 16, 1000);
BENCHMARK_TEMPLATE(BM_Jaro_Cached, 32, 1000);
BENCHMARK_TEMPLATE(BM_Jaro_Cached, 64, 1000);
#ifdef RAPIDFUZZ_SIMD
BENCHMARK_TEMPLATE(BM_Jaro_SIMD, 8, 1000);
BENCHMARK_TEMPLATE(BM_Jaro_SIMD, 16, 1000);
BENCHMARK_TEMPLATE(BM_Jaro_SIMD, 32, 1000);
BENCHMARK_TEMPLATE(BM_Jaro_SIMD, 64, 1000);
#endif
BENCHMARK(BM_JaroLongSimilarSequence)
->Args({100, 30})
->Args({500, 30})
->Args({5000, 30})
->Args({10000, 30})
->Args({20000, 30})
->Args({50000, 30});
BENCHMARK(BM_JaroLongNonSimilarSequence)
->Args({100, 30})
->Args({500, 30})
->Args({5000, 30})
->Args({10000, 30})
->Args({20000, 30})
->Args({50000, 30});
BENCHMARK_MAIN(); rapidfuzz-cpp-3.3.1/bench/bench-lcs.cpp 0000664 0000000 0000000 00000013623 14744034430 0017745 0 ustar 00root root 0000000 0000000 #include
#include
#include
#include
#include
#include
std::string generate(int max_length)
{
std::string possible_characters = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
std::random_device rd;
std::mt19937 engine(rd());
std::uniform_int_distribution<> dist(0, static_cast(possible_characters.size() - 1));
std::string ret = "";
for (int i = 0; i < max_length; i++) {
int random_index = dist(engine);
ret += possible_characters[static_cast(random_index)];
}
return ret;
}
template
std::basic_string str_multiply(std::basic_string a, unsigned int b)
{
std::basic_string output;
while (b--)
output += a;
return output;
}
static void BM_LcsLongSimilarSequence(benchmark::State& state)
{
size_t len = state.range(0);
size_t score_cutoff = state.range(1);
std::string s1 = std::string("a") + str_multiply(std::string("b"), (len - 2)) + std::string("a");
std::string s2 = str_multiply(std::string("b"), len);
size_t num = 0;
for (auto _ : state) {
benchmark::DoNotOptimize(rapidfuzz::lcs_seq_distance(s1, s2, score_cutoff));
++num;
}
state.counters["Rate"] = benchmark::Counter(static_cast(num * len), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num * len),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
static void BM_LcsLongNonSimilarSequence(benchmark::State& state)
{
size_t len = state.range(0);
size_t score_cutoff = state.range(1);
std::string s1 = str_multiply(std::string("a"), len);
std::string s2 = str_multiply(std::string("b"), len);
size_t num = 0;
for (auto _ : state) {
benchmark::DoNotOptimize(rapidfuzz::lcs_seq_distance(s1, s2, score_cutoff));
++num;
}
state.counters["Rate"] = benchmark::Counter(static_cast(num * len), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num * len),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
template
static void BM_LCS(benchmark::State& state)
{
std::vector seq1;
std::vector seq2;
for (int i = 0; i < 256; i++)
seq1.push_back(generate(MaxLen));
for (int i = 0; i < 10000; i++)
seq2.push_back(generate(MaxLen));
size_t num = 0;
for (auto _ : state) {
for (size_t j = 0; j < seq2.size(); ++j)
for (size_t i = 0; i < seq1.size(); ++i)
benchmark::DoNotOptimize(rapidfuzz::lcs_seq_distance(seq1[i], seq2[j]));
num += seq1.size() * seq2.size();
}
state.counters["Rate"] = benchmark::Counter(static_cast(num), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
template
static void BM_LCS_Cached(benchmark::State& state)
{
std::vector seq1;
std::vector seq2;
for (int i = 0; i < 256; i++)
seq1.push_back(generate(MaxLen));
for (int i = 0; i < 10000; i++)
seq2.push_back(generate(MaxLen));
size_t num = 0;
for (auto _ : state) {
for (const auto& str1 : seq1) {
rapidfuzz::CachedLCSseq scorer(str1);
for (size_t j = 0; j < seq2.size(); ++j)
benchmark::DoNotOptimize(scorer.similarity(seq2[j]));
}
num += seq1.size() * seq2.size();
}
state.counters["Rate"] = benchmark::Counter(static_cast(num), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
#ifdef RAPIDFUZZ_SIMD
template
static void BM_LCS_SIMD(benchmark::State& state)
{
std::vector seq1;
std::vector seq2;
std::vector results(32 * 3 * 4);
for (int i = 0; i < 32 * 3 * 4; i++)
seq1.push_back(generate(MaxLen));
for (int i = 0; i < 10000; i++)
seq2.push_back(generate(MaxLen));
size_t num = 0;
for (auto _ : state) {
rapidfuzz::experimental::MultiLCSseq scorer(seq1.size());
for (const auto& str1 : seq1)
scorer.insert(str1);
for (const auto& str2 : seq2)
scorer.similarity(&results[0], results.size(), str2);
num += seq1.size() * seq2.size();
}
state.counters["Rate"] = benchmark::Counter(static_cast(num), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
#endif
BENCHMARK(BM_LcsLongSimilarSequence)
->Args({100, 30})
->Args({500, 100})
->Args({500, 30})
->Args({5000, 30})
->Args({10000, 30})
->Args({20000, 30})
->Args({50000, 30});
BENCHMARK(BM_LcsLongNonSimilarSequence)
->Args({100, 30})
->Args({500, 30})
->Args({5000, 30})
->Args({10000, 30})
->Args({20000, 30})
->Args({50000, 30});
BENCHMARK_TEMPLATE(BM_LCS, 8);
BENCHMARK_TEMPLATE(BM_LCS, 16);
BENCHMARK_TEMPLATE(BM_LCS, 32);
BENCHMARK_TEMPLATE(BM_LCS, 64);
BENCHMARK_TEMPLATE(BM_LCS_Cached, 8);
BENCHMARK_TEMPLATE(BM_LCS_Cached, 16);
BENCHMARK_TEMPLATE(BM_LCS_Cached, 32);
BENCHMARK_TEMPLATE(BM_LCS_Cached, 64);
#ifdef RAPIDFUZZ_SIMD
BENCHMARK_TEMPLATE(BM_LCS_SIMD, 8);
BENCHMARK_TEMPLATE(BM_LCS_SIMD, 16);
BENCHMARK_TEMPLATE(BM_LCS_SIMD, 32);
BENCHMARK_TEMPLATE(BM_LCS_SIMD, 64);
#endif
BENCHMARK_MAIN(); rapidfuzz-cpp-3.3.1/bench/bench-levenshtein.cpp 0000664 0000000 0000000 00000016277 14744034430 0021520 0 ustar 00root root 0000000 0000000 #include
#include
#include
#include
#include
std::string generate(int max_length)
{
std::string possible_characters = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
std::random_device rd;
std::mt19937 engine(rd());
std::uniform_int_distribution<> dist(0, static_cast(possible_characters.size() - 1));
std::string ret = "";
for (int i = 0; i < max_length; i++) {
int random_index = dist(engine);
ret += possible_characters[static_cast(random_index)];
}
return ret;
}
template
std::basic_string str_multiply(std::basic_string a, unsigned int b)
{
std::basic_string output;
while (b--)
output += a;
return output;
}
// Define another benchmark
static void BM_LevWeightedDist1(benchmark::State& state)
{
std::string a = "aaaaa aaaaa";
for (auto _ : state) {
benchmark::DoNotOptimize(rapidfuzz::levenshtein_distance(a, a));
}
state.SetLabel("Similar Strings");
}
static void BM_LevWeightedDist2(benchmark::State& state)
{
std::string a = "aaaaa aaaaa";
std::string b = "bbbbb bbbbb";
for (auto _ : state) {
benchmark::DoNotOptimize(rapidfuzz::levenshtein_distance(a, b));
}
state.SetLabel("Different Strings");
}
static void BM_LevNormWeightedDist1(benchmark::State& state)
{
std::string a = "aaaaa aaaaa";
for (auto _ : state) {
benchmark::DoNotOptimize(rapidfuzz::levenshtein_normalized_distance(a, a));
}
state.SetLabel("Similar Strings");
}
static void BM_LevNormWeightedDist2(benchmark::State& state)
{
std::string a = "aaaaa aaaaa";
std::string b = "bbbbb bbbbb";
for (auto _ : state) {
benchmark::DoNotOptimize(rapidfuzz::levenshtein_normalized_distance(a, b));
}
state.SetLabel("Different Strings");
}
static void BM_LevLongSimilarSequence(benchmark::State& state)
{
size_t len = state.range(0);
size_t score_cutoff = state.range(1);
std::string s1 = std::string("a") + str_multiply(std::string("b"), (len - 2)) + std::string("a");
std::string s2 = str_multiply(std::string("b"), len);
size_t num = 0;
for (auto _ : state) {
benchmark::DoNotOptimize(rapidfuzz::levenshtein_distance(s1, s2, {1, 1, 1}, score_cutoff));
++num;
}
state.counters["Rate"] = benchmark::Counter(static_cast(num * len), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num * len),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
static void BM_LevLongNonSimilarSequence(benchmark::State& state)
{
size_t len = state.range(0);
size_t score_cutoff = state.range(1);
std::string s1 = str_multiply(std::string("a"), len);
std::string s2 = str_multiply(std::string("b"), len);
size_t num = 0;
for (auto _ : state) {
benchmark::DoNotOptimize(rapidfuzz::levenshtein_distance(s1, s2, {1, 1, 1}, score_cutoff));
++num;
}
state.counters["Rate"] = benchmark::Counter(static_cast(num * len), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num * len),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
template
static void BM_Levenshtein(benchmark::State& state)
{
std::vector seq1;
std::vector seq2;
for (int i = 0; i < 256; i++)
seq1.push_back(generate(MaxLen));
for (int i = 0; i < 10000; i++)
seq2.push_back(generate(MaxLen));
size_t num = 0;
for (auto _ : state) {
for (size_t j = 0; j < seq2.size(); ++j)
for (size_t i = 0; i < seq1.size(); ++i)
benchmark::DoNotOptimize(rapidfuzz::levenshtein_distance(seq1[i], seq2[j]));
num += seq1.size() * seq2.size();
}
state.counters["Rate"] = benchmark::Counter(static_cast(num), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
template
static void BM_Levenshtein_Cached(benchmark::State& state)
{
std::vector seq1;
std::vector seq2;
for (int i = 0; i < 256; i++)
seq1.push_back(generate(MaxLen));
for (int i = 0; i < 10000; i++)
seq2.push_back(generate(MaxLen));
size_t num = 0;
for (auto _ : state) {
for (const auto& str1 : seq1) {
rapidfuzz::CachedLevenshtein scorer(str1);
for (size_t j = 0; j < seq2.size(); ++j)
benchmark::DoNotOptimize(scorer.similarity(seq2[j]));
}
num += seq1.size() * seq2.size();
}
state.counters["Rate"] = benchmark::Counter(static_cast(num), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
#ifdef RAPIDFUZZ_SIMD
template
static void BM_Levenshtein_SIMD(benchmark::State& state)
{
std::vector seq1;
std::vector seq2;
std::vector results(64);
for (int i = 0; i < 64; i++)
seq1.push_back(generate(MaxLen));
for (int i = 0; i < 10000; i++)
seq2.push_back(generate(MaxLen));
size_t num = 0;
for (auto _ : state) {
rapidfuzz::experimental::MultiLevenshtein scorer(seq1.size());
for (const auto& str1 : seq1)
scorer.insert(str1);
for (const auto& str2 : seq2)
scorer.similarity(&results[0], results.size(), str2);
num += seq1.size() * seq2.size();
}
state.counters["Rate"] = benchmark::Counter(static_cast(num), benchmark::Counter::kIsRate);
state.counters["InvRate"] = benchmark::Counter(static_cast(num),
benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
}
#endif
BENCHMARK(BM_LevLongSimilarSequence)
->Args({100, 30})
->Args({500, 30})
->Args({5000, 30})
->Args({10000, 30})
->Args({20000, 30})
->Args({50000, 30});
BENCHMARK(BM_LevLongNonSimilarSequence)
->Args({100, 30})
->Args({500, 30})
->Args({5000, 30})
->Args({10000, 30})
->Args({20000, 30})
->Args({50000, 30});
BENCHMARK(BM_LevWeightedDist1);
BENCHMARK(BM_LevWeightedDist2);
BENCHMARK(BM_LevNormWeightedDist1);
BENCHMARK(BM_LevNormWeightedDist2);
BENCHMARK_TEMPLATE(BM_Levenshtein, 8);
BENCHMARK_TEMPLATE(BM_Levenshtein, 16);
BENCHMARK_TEMPLATE(BM_Levenshtein, 32);
BENCHMARK_TEMPLATE(BM_Levenshtein, 64);
BENCHMARK_TEMPLATE(BM_Levenshtein_Cached, 8);
BENCHMARK_TEMPLATE(BM_Levenshtein_Cached, 16);
BENCHMARK_TEMPLATE(BM_Levenshtein_Cached, 32);
BENCHMARK_TEMPLATE(BM_Levenshtein_Cached, 64);
#ifdef RAPIDFUZZ_SIMD
BENCHMARK_TEMPLATE(BM_Levenshtein_SIMD, 8);
BENCHMARK_TEMPLATE(BM_Levenshtein_SIMD, 16);
BENCHMARK_TEMPLATE(BM_Levenshtein_SIMD, 32);
BENCHMARK_TEMPLATE(BM_Levenshtein_SIMD, 64);
#endif
BENCHMARK_MAIN(); rapidfuzz-cpp-3.3.1/cmake/ 0000775 0000000 0000000 00000000000 14744034430 0015377 5 ustar 00root root 0000000 0000000 rapidfuzz-cpp-3.3.1/cmake/rapidfuzzConfig.cmake.in 0000664 0000000 0000000 00000000401 14744034430 0022145 0 ustar 00root root 0000000 0000000 @PACKAGE_INIT@
# Avoid repeatedly including the targets
if(NOT TARGET rapidfuzz::rapidfuzz)
# Provide path for scripts
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}")
include(${CMAKE_CURRENT_LIST_DIR}/rapidfuzzTargets.cmake)
endif() rapidfuzz-cpp-3.3.1/docs/ 0000775 0000000 0000000 00000000000 14744034430 0015247 5 ustar 00root root 0000000 0000000 rapidfuzz-cpp-3.3.1/docs/literature/ 0000775 0000000 0000000 00000000000 14744034430 0017427 5 ustar 00root root 0000000 0000000 rapidfuzz-cpp-3.3.1/docs/literature/hyrro_2002.bib 0000664 0000000 0000000 00000000307 14744034430 0021713 0 ustar 00root root 0000000 0000000 @article{hyrro_2002,
author = {Hyyro, Heikki},
year = {2002},
month = {10},
pages = {},
title = {Explaining and Extending the Bit-parallel Approximate String Matching Algorithm of Myers}
}
rapidfuzz-cpp-3.3.1/docs/literature/hyrro_2004.bib 0000664 0000000 0000000 00000000375 14744034430 0021722 0 ustar 00root root 0000000 0000000 @article{hyrro_2004,
author = {Hyyro, Heikki},
year = {2004},
month = {08},
pages = {},
title = {Bit-Parallel LCS-length Computation Revisited},
journal = {Proc. 15th Australasian Workshop on Combinatorial Algorithms (AWOCA 2004)}
}
rapidfuzz-cpp-3.3.1/docs/literature/hyrro_lcs_2004.bib 0000664 0000000 0000000 00000000370 14744034430 0022556 0 ustar 00root root 0000000 0000000 @article{hyrro_lcs_2004,
author = {Hyyro, Heikki},
year = {2004},
month = {08},
pages = {},
title = {Bit-Parallel LCS-length Computation Revisited},
journal = {Proc. 15th Australasian Workshop on Combinatorial Algorithms (AWOCA 2004)}
} rapidfuzz-cpp-3.3.1/docs/literature/myers_1999.bib 0000664 0000000 0000000 00000001044 14744034430 0021736 0 ustar 00root root 0000000 0000000 @article{myers_1999,
author = {Myers, Gene},
title = {A Fast Bit-Vector Algorithm for Approximate String Matching Based on Dynamic Programming},
year = {1999},
issue_date = {May 1999},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {46},
number = {3},
issn = {0004-5411},
url = {https://doi.org/10.1145/316542.316550},
doi = {10.1145/316542.316550},
journal = {J. ACM},
month = may,
pages = {395–415},
numpages = {21},
keywords = {approximate string search, sequence comparison, bit-parallelism}
}
rapidfuzz-cpp-3.3.1/docs/literature/wagner_fischer_1974.bib 0000664 0000000 0000000 00000002150 14744034430 0023555 0 ustar 00root root 0000000 0000000 @article{wagner_fischer_1974,
author = {Wagner, Robert A. and Fischer, Michael J.},
title = {The String-to-String Correction Problem},
year = {1974},
issue_date = {Jan. 1974},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {21},
number = {1},
issn = {0004-5411},
url = {https://doi.org/10.1145/321796.321811},
doi = {10.1145/321796.321811},
abstract = {The string-to-string correction problem is to determine the distance between two strings as measured by the minimum cost sequence of “edit operations” needed to change the one string into the other. The edit operations investigated allow changing one symbol of a string into another single symbol, deleting one symbol from a string, or inserting a single symbol into a string. An algorithm is presented which solves this problem in time proportional to the product of the lengths of the two strings. Possible applications are to the problems of automatic spelling correction and determining the longest subsequence of characters common to two strings.},
journal = {J. ACM},
month = jan,
pages = {168–173},
numpages = {6}
}
rapidfuzz-cpp-3.3.1/examples/ 0000775 0000000 0000000 00000000000 14744034430 0016135 5 ustar 00root root 0000000 0000000 rapidfuzz-cpp-3.3.1/examples/cmake_export/ 0000775 0000000 0000000 00000000000 14744034430 0020616 5 ustar 00root root 0000000 0000000 rapidfuzz-cpp-3.3.1/examples/cmake_export/CMakeLists.txt 0000664 0000000 0000000 00000002622 14744034430 0023360 0 ustar 00root root 0000000 0000000 cmake_minimum_required(VERSION 3.5)
project(foo LANGUAGES CXX VERSION 0.0.1)
# The example library publicly dependent on RapidFuzz (includes
# rapidfuzz.hpp in foo_lib.hpp), necessitating RapidFuzz's installation
set(RAPIDFUZZ_INSTALL ON CACHE INTERNAL "")
add_subdirectory(${CMAKE_SOURCE_DIR}/../..
${CMAKE_SOURCE_DIR}/../../build)
add_library(foo foo_lib.cc)
add_library(foo::foo ALIAS foo)
target_link_libraries(foo rapidfuzz)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake")
include(GNUInstallDirs)
include(CMakePackageConfigHelpers)
set(FOO_CMAKE_CONFIG_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/foo")
install(TARGETS foo EXPORT fooTargs DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(EXPORT fooTargs NAMESPACE foo:: DESTINATION ${FOO_CMAKE_CONFIG_DESTINATION})
configure_package_config_file(
${CMAKE_CURRENT_LIST_DIR}/${PROJECT_NAME}Config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
INSTALL_DESTINATION ${FOO_CMAKE_CONFIG_DESTINATION}
)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
COMPATIBILITY SameMajorVersion
)
install(
FILES
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
DESTINATION
${FOO_CMAKE_CONFIG_DESTINATION}
)
install(FILES foo_lib.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
rapidfuzz-cpp-3.3.1/examples/cmake_export/fooConfig.cmake.in 0000664 0000000 0000000 00000000422 14744034430 0024134 0 ustar 00root root 0000000 0000000 @PACKAGE_INIT@
# Avoid repeatedly including the targets
if(NOT TARGET foo::foo)
find_package(rapidfuzz REQUIRED)
# Provide path for scripts
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}")
include(${CMAKE_CURRENT_LIST_DIR}/fooTargs.cmake)
endif() rapidfuzz-cpp-3.3.1/examples/cmake_export/foo_lib.cc 0000664 0000000 0000000 00000000237 14744034430 0022540 0 ustar 00root root 0000000 0000000 #include "foo_lib.hpp"
double fooFunc() {
std::string_view a("aaaa"), b("abaa");
FooType cache(a.begin(), a.end());
return cache.similarity(b);
}
rapidfuzz-cpp-3.3.1/examples/cmake_export/foo_lib.hpp 0000664 0000000 0000000 00000000145 14744034430 0022740 0 ustar 00root root 0000000 0000000 #include
using FooType = rapidfuzz::fuzz::CachedRatio;
double fooFunc();
rapidfuzz-cpp-3.3.1/examples/cmake_export/indirect_app/ 0000775 0000000 0000000 00000000000 14744034430 0023257 5 ustar 00root root 0000000 0000000 rapidfuzz-cpp-3.3.1/examples/cmake_export/indirect_app/CMakeLists.txt 0000664 0000000 0000000 00000000264 14744034430 0026021 0 ustar 00root root 0000000 0000000 cmake_minimum_required(VERSION 3.5)
project(fooapp LANGUAGES CXX VERSION 0.0.1)
find_package(foo REQUIRED)
add_executable(fooapp foo_app.cc)
target_link_libraries(fooapp foo::foo)
rapidfuzz-cpp-3.3.1/examples/cmake_export/indirect_app/foo_app.cc 0000664 0000000 0000000 00000000154 14744034430 0025211 0 ustar 00root root 0000000 0000000 #include
#include
int main() {
std::cout << fooFunc() << '\n';
return 0;
} rapidfuzz-cpp-3.3.1/examples/cmake_installed/ 0000775 0000000 0000000 00000000000 14744034430 0021254 5 ustar 00root root 0000000 0000000 rapidfuzz-cpp-3.3.1/examples/cmake_installed/CMakeLists.txt 0000664 0000000 0000000 00000000257 14744034430 0024020 0 ustar 00root root 0000000 0000000 cmake_minimum_required(VERSION 3.8)
project(cmake_installed CXX)
find_package(rapidfuzz REQUIRED)
add_executable(foo main.cpp)
target_link_libraries(foo rapidfuzz::rapidfuzz) rapidfuzz-cpp-3.3.1/examples/cmake_installed/main.cpp 0000664 0000000 0000000 00000000307 14744034430 0022704 0 ustar 00root root 0000000 0000000 #include
#include
#include
int main()
{
std::string a = "aaaa";
std::string b = "abab";
std::cout << rapidfuzz::fuzz::ratio(a, b) << std::endl;
} rapidfuzz-cpp-3.3.1/extras/ 0000775 0000000 0000000 00000000000 14744034430 0015625 5 ustar 00root root 0000000 0000000 rapidfuzz-cpp-3.3.1/extras/rapidfuzz_amalgamated.hpp 0000664 0000000 0000000 00001360322 14744034430 0022700 0 ustar 00root root 0000000 0000000 // Licensed under the MIT License .
// SPDX-License-Identifier: MIT
// RapidFuzz v1.0.2
// Generated: 2024-12-25 11:44:52.213162
// ----------------------------------------------------------
// This file is an amalgamation of multiple different files.
// You probably shouldn't edit it directly.
// ----------------------------------------------------------
#ifndef RAPIDFUZZ_AMALGAMATED_HPP_INCLUDED
#define RAPIDFUZZ_AMALGAMATED_HPP_INCLUDED
#include
#include
#include
#include
#include
#include
#include
#include
namespace rapidfuzz {
namespace detail {
/* hashmap for integers which can only grow, but can't remove elements */
template
struct GrowingHashmap {
using key_type = T_Key;
using value_type = T_Entry;
using size_type = unsigned int;
private:
static constexpr size_type min_size = 8;
struct MapElem {
key_type key;
value_type value = value_type();
};
int used;
int fill;
int mask;
MapElem* m_map;
public:
GrowingHashmap() : used(0), fill(0), mask(-1), m_map(nullptr)
{}
~GrowingHashmap()
{
delete[] m_map;
}
GrowingHashmap(const GrowingHashmap& other) : used(other.used), fill(other.fill), mask(other.mask)
{
int size = mask + 1;
m_map = new MapElem[size];
std::copy(other.m_map, other.m_map + size, m_map);
}
GrowingHashmap(GrowingHashmap&& other) noexcept : GrowingHashmap()
{
swap(*this, other);
}
GrowingHashmap& operator=(GrowingHashmap other)
{
swap(*this, other);
return *this;
}
friend void swap(GrowingHashmap& first, GrowingHashmap& second) noexcept
{
std::swap(first.used, second.used);
std::swap(first.fill, second.fill);
std::swap(first.mask, second.mask);
std::swap(first.m_map, second.m_map);
}
size_type size() const
{
return used;
}
size_type capacity() const
{
return mask + 1;
}
bool empty() const
{
return used == 0;
}
value_type get(key_type key) const noexcept
{
if (m_map == nullptr) return value_type();
return m_map[lookup(key)].value;
}
value_type& operator[](key_type key) noexcept
{
if (m_map == nullptr) allocate();
size_t i = lookup(key);
if (m_map[i].value == value_type()) {
/* resize when 2/3 full */
if (++fill * 3 >= (mask + 1) * 2) {
grow((used + 1) * 2);
i = lookup(key);
}
used++;
}
m_map[i].key = key;
return m_map[i].value;
}
private:
void allocate()
{
mask = min_size - 1;
m_map = new MapElem[min_size];
}
/**
* lookup key inside the hashmap using a similar collision resolution
* strategy to CPython and Ruby
*/
size_t lookup(key_type key) const
{
size_t hash = static_cast(key);
size_t i = hash & static_cast(mask);
if (m_map[i].value == value_type() || m_map[i].key == key) return i;
size_t perturb = hash;
while (true) {
i = (i * 5 + perturb + 1) & static_cast(mask);
if (m_map[i].value == value_type() || m_map[i].key == key) return i;
perturb >>= 5;
}
}
void grow(int minUsed)
{
int newSize = mask + 1;
while (newSize <= minUsed)
newSize <<= 1;
MapElem* oldMap = m_map;
m_map = new MapElem[static_cast(newSize)];
fill = used;
mask = newSize - 1;
for (int i = 0; used > 0; i++)
if (oldMap[i].value != value_type()) {
size_t j = lookup(oldMap[i].key);
m_map[j].key = oldMap[i].key;
m_map[j].value = oldMap[i].value;
used--;
}
used = fill;
delete[] oldMap;
}
};
template
struct HybridGrowingHashmap {
using key_type = T_Key;
using value_type = T_Entry;
HybridGrowingHashmap()
{
m_extendedAscii.fill(value_type());
}
value_type get(char key) const noexcept
{
/** treat char as value between 0 and 127 for performance reasons */
return m_extendedAscii[static_cast(key)];
}
template
value_type get(CharT key) const noexcept
{
if (key >= 0 && key <= 255)
return m_extendedAscii[static_cast(key)];
else
return m_map.get(static_cast(key));
}
value_type& operator[](char key) noexcept
{
/** treat char as value between 0 and 127 for performance reasons */
return m_extendedAscii[static_cast(key)];
}
template
value_type& operator[](CharT key)
{
if (key >= 0 && key <= 255)
return m_extendedAscii[static_cast(key)];
else
return m_map[static_cast(key)];
}
private:
GrowingHashmap m_map;
std::array m_extendedAscii;
};
} // namespace detail
} // namespace rapidfuzz
#include
#include
#include
#include
#include
namespace rapidfuzz {
namespace detail {
template
struct BitMatrixView {
using value_type = T;
using size_type = size_t;
using pointer = typename std::conditional::type;
using reference = typename std::conditional::type;
BitMatrixView(pointer vector, size_type cols) noexcept : m_vector(vector), m_cols(cols)
{}
reference operator[](size_type col) noexcept
{
assert(col < m_cols);
return m_vector[col];
}
size_type size() const noexcept
{
return m_cols;
}
private:
pointer m_vector;
size_type m_cols;
};
template
struct BitMatrix {
using value_type = T;
BitMatrix() : m_rows(0), m_cols(0), m_matrix(nullptr)
{}
BitMatrix(size_t rows, size_t cols, T val) : m_rows(rows), m_cols(cols), m_matrix(nullptr)
{
if (m_rows && m_cols) m_matrix = new T[m_rows * m_cols];
std::fill_n(m_matrix, m_rows * m_cols, val);
}
BitMatrix(const BitMatrix& other) : m_rows(other.m_rows), m_cols(other.m_cols), m_matrix(nullptr)
{
if (m_rows && m_cols) m_matrix = new T[m_rows * m_cols];
std::copy(other.m_matrix, other.m_matrix + m_rows * m_cols, m_matrix);
}
BitMatrix(BitMatrix&& other) noexcept : m_rows(0), m_cols(0), m_matrix(nullptr)
{
other.swap(*this);
}
BitMatrix& operator=(BitMatrix&& other) noexcept
{
other.swap(*this);
return *this;
}
BitMatrix& operator=(const BitMatrix& other)
{
BitMatrix temp = other;
temp.swap(*this);
return *this;
}
void swap(BitMatrix& rhs) noexcept
{
using std::swap;
swap(m_rows, rhs.m_rows);
swap(m_cols, rhs.m_cols);
swap(m_matrix, rhs.m_matrix);
}
~BitMatrix()
{
delete[] m_matrix;
}
BitMatrixView operator[](size_t row) noexcept
{
assert(row < m_rows);
return {&m_matrix[row * m_cols], m_cols};
}
BitMatrixView operator[](size_t row) const noexcept
{
assert(row < m_rows);
return {&m_matrix[row * m_cols], m_cols};
}
size_t rows() const noexcept
{
return m_rows;
}
size_t cols() const noexcept
{
return m_cols;
}
private:
size_t m_rows;
size_t m_cols;
T* m_matrix;
};
template
struct ShiftedBitMatrix {
using value_type = T;
ShiftedBitMatrix()
{}
ShiftedBitMatrix(size_t rows, size_t cols, T val) : m_matrix(rows, cols, val), m_offsets(rows)
{}
ShiftedBitMatrix(const ShiftedBitMatrix& other) : m_matrix(other.m_matrix), m_offsets(other.m_offsets)
{}
ShiftedBitMatrix(ShiftedBitMatrix&& other) noexcept
{
other.swap(*this);
}
ShiftedBitMatrix& operator=(ShiftedBitMatrix&& other) noexcept
{
other.swap(*this);
return *this;
}
ShiftedBitMatrix& operator=(const ShiftedBitMatrix& other)
{
ShiftedBitMatrix temp = other;
temp.swap(*this);
return *this;
}
void swap(ShiftedBitMatrix& rhs) noexcept
{
using std::swap;
swap(m_matrix, rhs.m_matrix);
swap(m_offsets, rhs.m_offsets);
}
bool test_bit(size_t row, size_t col, bool default_ = false) const noexcept
{
ptrdiff_t offset = m_offsets[row];
if (offset < 0) {
col += static_cast(-offset);
}
else if (col >= static_cast(offset)) {
col -= static_cast(offset);
}
/* bit on the left of the band */
else {
return default_;
}
size_t word_size = sizeof(value_type) * 8;
size_t col_word = col / word_size;
value_type col_mask = value_type(1) << (col % word_size);
return bool(m_matrix[row][col_word] & col_mask);
}
BitMatrixView operator[](size_t row) noexcept
{
return m_matrix[row];
}
BitMatrixView operator[](size_t row) const noexcept
{
return m_matrix[row];
}
void set_offset(size_t row, ptrdiff_t offset)
{
m_offsets[row] = offset;
}
private:
BitMatrix m_matrix;
std::vector m_offsets;
};
} // namespace detail
} // namespace rapidfuzz
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
# define RAPIDFUZZ_DEDUCTION_GUIDES
# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 1
# define RAPIDFUZZ_IF_CONSTEXPR if constexpr
#else
# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 0
# define RAPIDFUZZ_IF_CONSTEXPR if
#endif
#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) || __cplusplus >= 201402L)
# define RAPIDFUZZ_CONSTEXPR_CXX14 constexpr
#else
# define RAPIDFUZZ_CONSTEXPR_CXX14
#endif
#include
#include
#include
namespace rapidfuzz {
struct StringAffix {
size_t prefix_len;
size_t suffix_len;
};
struct LevenshteinWeightTable {
size_t insert_cost;
size_t delete_cost;
size_t replace_cost;
};
/**
* @brief Edit operation types used by the Levenshtein distance
*/
enum class EditType {
None = 0, /**< No Operation required */
Replace = 1, /**< Replace a character if a string by another character */
Insert = 2, /**< Insert a character into a string */
Delete = 3 /**< Delete a character from a string */
};
/**
* @brief Edit operations used by the Levenshtein distance
*
* This represents an edit operation of type type which is applied to
* the source string
*
* Replace: replace character at src_pos with character at dest_pos
* Insert: insert character from dest_pos at src_pos
* Delete: delete character at src_pos
*/
struct EditOp {
EditType type; /**< type of the edit operation */
size_t src_pos; /**< index into the source string */
size_t dest_pos; /**< index into the destination string */
EditOp() : type(EditType::None), src_pos(0), dest_pos(0)
{}
EditOp(EditType type_, size_t src_pos_, size_t dest_pos_)
: type(type_), src_pos(src_pos_), dest_pos(dest_pos_)
{}
};
inline bool operator==(EditOp a, EditOp b)
{
return (a.type == b.type) && (a.src_pos == b.src_pos) && (a.dest_pos == b.dest_pos);
}
inline bool operator!=(EditOp a, EditOp b)
{
return !(a == b);
}
/**
* @brief Edit operations used by the Levenshtein distance
*
* This represents an edit operation of type type which is applied to
* the source string
*
* None: s1[src_begin:src_end] == s1[dest_begin:dest_end]
* Replace: s1[i1:i2] should be replaced by s2[dest_begin:dest_end]
* Insert: s2[dest_begin:dest_end] should be inserted at s1[src_begin:src_begin].
* Note that src_begin==src_end in this case.
* Delete: s1[src_begin:src_end] should be deleted.
* Note that dest_begin==dest_end in this case.
*/
struct Opcode {
EditType type; /**< type of the edit operation */
size_t src_begin; /**< index into the source string */
size_t src_end; /**< index into the source string */
size_t dest_begin; /**< index into the destination string */
size_t dest_end; /**< index into the destination string */
Opcode() : type(EditType::None), src_begin(0), src_end(0), dest_begin(0), dest_end(0)
{}
Opcode(EditType type_, size_t src_begin_, size_t src_end_, size_t dest_begin_, size_t dest_end_)
: type(type_), src_begin(src_begin_), src_end(src_end_), dest_begin(dest_begin_), dest_end(dest_end_)
{}
};
inline bool operator==(Opcode a, Opcode b)
{
return (a.type == b.type) && (a.src_begin == b.src_begin) && (a.src_end == b.src_end) &&
(a.dest_begin == b.dest_begin) && (a.dest_end == b.dest_end);
}
inline bool operator!=(Opcode a, Opcode b)
{
return !(a == b);
}
namespace detail {
template
auto vector_slice(const Vec& vec, int start, int stop, int step) -> Vec
{
Vec new_vec;
if (step == 0) throw std::invalid_argument("slice step cannot be zero");
if (step < 0) throw std::invalid_argument("step sizes below 0 lead to an invalid order of editops");
if (start < 0)
start = std::max(start + static_cast(vec.size()), 0);
else if (start > static_cast(vec.size()))
start = static_cast(vec.size());
if (stop < 0)
stop = std::max(stop + static_cast(vec.size()), 0);
else if (stop > static_cast(vec.size()))
stop = static_cast(vec.size());
if (start >= stop) return new_vec;
int count = (stop - 1 - start) / step + 1;
new_vec.reserve(static_cast(count));
for (int i = start; i < stop; i += step)
new_vec.push_back(vec[static_cast(i)]);
return new_vec;
}
template
void vector_remove_slice(Vec& vec, int start, int stop, int step)
{
if (step == 0) throw std::invalid_argument("slice step cannot be zero");
if (step < 0) throw std::invalid_argument("step sizes below 0 lead to an invalid order of editops");
if (start < 0)
start = std::max(start + static_cast(vec.size()), 0);
else if (start > static_cast(vec.size()))
start = static_cast(vec.size());
if (stop < 0)
stop = std::max(stop + static_cast(vec.size()), 0);
else if (stop > static_cast(vec.size()))
stop = static_cast(vec.size());
if (start >= stop) return;
auto iter = vec.begin() + start;
for (int i = start; i < static_cast(vec.size()); i++)
if (i >= stop || ((i - start) % step != 0)) *(iter++) = vec[static_cast(i)];
vec.resize(static_cast(std::distance(vec.begin(), iter)));
vec.shrink_to_fit();
}
} // namespace detail
class Opcodes;
class Editops : private std::vector {
public:
using std::vector::size_type;
Editops() noexcept : src_len(0), dest_len(0)
{}
Editops(size_type count, const EditOp& value) : std::vector(count, value), src_len(0), dest_len(0)
{}
explicit Editops(size_type count) : std::vector(count), src_len(0), dest_len(0)
{}
Editops(const Editops& other)
: std::vector(other), src_len(other.src_len), dest_len(other.dest_len)
{}
Editops(const Opcodes& other);
Editops(Editops&& other) noexcept
{
swap(other);
}
Editops& operator=(Editops other) noexcept
{
swap(other);
return *this;
}
/* Element access */
using std::vector::at;
using std::vector::operator[];
using std::vector::front;
using std::vector::back;
using std::vector::data;
/* Iterators */
using std::vector::begin;
using std::vector::cbegin;
using std::vector::end;
using std::vector::cend;
using std::vector::rbegin;
using std::vector::crbegin;
using std::vector::rend;
using std::vector::crend;
/* Capacity */
using std::vector::empty;
using std::vector::size;
using std::vector::max_size;
using std::vector::reserve;
using std::vector::capacity;
using std::vector::shrink_to_fit;
/* Modifiers */
using std::vector::clear;
using std::vector::insert;
using std::vector::emplace;
using std::vector::erase;
using std::vector::push_back;
using std::vector::emplace_back;
using std::vector::pop_back;
using std::vector::resize;
void swap(Editops& rhs) noexcept
{
std::swap(src_len, rhs.src_len);
std::swap(dest_len, rhs.dest_len);
std::vector::swap(rhs);
}
Editops slice(int start, int stop, int step = 1) const
{
Editops ed_slice = detail::vector_slice(*this, start, stop, step);
ed_slice.src_len = src_len;
ed_slice.dest_len = dest_len;
return ed_slice;
}
void remove_slice(int start, int stop, int step = 1)
{
detail::vector_remove_slice(*this, start, stop, step);
}
Editops reverse() const
{
Editops reversed = *this;
std::reverse(reversed.begin(), reversed.end());
return reversed;
}
size_t get_src_len() const noexcept
{
return src_len;
}
void set_src_len(size_t len) noexcept
{
src_len = len;
}
size_t get_dest_len() const noexcept
{
return dest_len;
}
void set_dest_len(size_t len) noexcept
{
dest_len = len;
}
Editops inverse() const
{
Editops inv_ops = *this;
std::swap(inv_ops.src_len, inv_ops.dest_len);
for (auto& op : inv_ops) {
std::swap(op.src_pos, op.dest_pos);
if (op.type == EditType::Delete)
op.type = EditType::Insert;
else if (op.type == EditType::Insert)
op.type = EditType::Delete;
}
return inv_ops;
}
Editops remove_subsequence(const Editops& subsequence) const
{
Editops result;
result.set_src_len(src_len);
result.set_dest_len(dest_len);
if (subsequence.size() > size()) throw std::invalid_argument("subsequence is not a subsequence");
result.resize(size() - subsequence.size());
/* offset to correct removed edit operations */
int offset = 0;
auto op_iter = begin();
auto op_end = end();
size_t result_pos = 0;
for (const auto& sop : subsequence) {
for (; op_iter != op_end && sop != *op_iter; op_iter++) {
result[result_pos] = *op_iter;
result[result_pos].src_pos =
static_cast(static_cast(result[result_pos].src_pos) + offset);
result_pos++;
}
/* element of subsequence not part of the sequence */
if (op_iter == op_end) throw std::invalid_argument("subsequence is not a subsequence");
if (sop.type == EditType::Insert)
offset++;
else if (sop.type == EditType::Delete)
offset--;
op_iter++;
}
/* add remaining elements */
for (; op_iter != op_end; op_iter++) {
result[result_pos] = *op_iter;
result[result_pos].src_pos =
static_cast(static_cast(result[result_pos].src_pos) + offset);
result_pos++;
}
return result;
}
private:
size_t src_len;
size_t dest_len;
};
inline bool operator==(const Editops& lhs, const Editops& rhs)
{
if (lhs.get_src_len() != rhs.get_src_len() || lhs.get_dest_len() != rhs.get_dest_len()) return false;
if (lhs.size() != rhs.size()) return false;
return std::equal(lhs.begin(), lhs.end(), rhs.begin());
}
inline bool operator!=(const Editops& lhs, const Editops& rhs)
{
return !(lhs == rhs);
}
inline void swap(Editops& lhs, Editops& rhs) noexcept(noexcept(lhs.swap(rhs)))
{
lhs.swap(rhs);
}
class Opcodes : private std::vector {
public:
using std::vector::size_type;
Opcodes() noexcept : src_len(0), dest_len(0)
{}
Opcodes(size_type count, const Opcode& value) : std::vector(count, value), src_len(0), dest_len(0)
{}
explicit Opcodes(size_type count) : std::vector(count), src_len(0), dest_len(0)
{}
Opcodes(const Opcodes& other)
: std::vector(other), src_len(other.src_len), dest_len(other.dest_len)
{}
Opcodes(const Editops& other);
Opcodes(Opcodes&& other) noexcept
{
swap(other);
}
Opcodes& operator=(Opcodes other) noexcept
{
swap(other);
return *this;
}
/* Element access */
using std::vector::at;
using std::vector::operator[];
using std::vector::front;
using std::vector::back;
using std::vector::data;
/* Iterators */
using std::vector::begin;
using std::vector