pax_global_header 0000666 0000000 0000000 00000000064 14371523677 0014531 g ustar 00root root 0000000 0000000 52 comment=d1116831a3514fff39321a73b8b526f17f7a2c18
WFA2-lib-2.3.3/ 0000775 0000000 0000000 00000000000 14371523677 0013001 5 ustar 00root root 0000000 0000000 WFA2-lib-2.3.3/.gitignore 0000664 0000000 0000000 00000000604 14371523677 0014771 0 ustar 00root root 0000000 0000000 lib/
bin/
build/
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app
# Test output files
tests/wfa.utest.log.correct
tests/wfa.utest.log.mem
tests/wfa.utest.log.time
WFA2-lib-2.3.3/CMakeLists.txt 0000664 0000000 0000000 00000013551 14371523677 0015546 0 ustar 00root root 0000000 0000000 # For Debian currently with
#
# cd build
# cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
# make
# make test
# make install
# See below option statements and the README for build information
cmake_minimum_required(VERSION 3.16)
project(wfa2lib)
set(CMAKE_CXX_STANDARD 17)
include(FeatureSummary)
include(GNUInstallDirs)
find_package(PkgConfig REQUIRED)
feature_summary(
FATAL_ON_MISSING_REQUIRED_PACKAGES
WHAT REQUIRED_PACKAGES_NOT_FOUND)
# ---- Options
option(OPENMP "Enable OpenMP" OFF) # enables WFA_PARALLEL
option(PROFILING "Enable profiling" OFF)
option(ASAN "Use address sanitiser" OFF)
option(EXTRA_FLAGS "Add optimization flags for C/C++ compiler" OFF)
# include(CheckIPOSupported) # adds lto
# check_ipo_supported(RESULT ipo_supported OUTPUT output)
# ---- Dependencies
if(OPENMP)
include(FindOpenMP)
set(OPTIMIZE_FLAGS "-DWFA_PARALLEL")
endif(OPENMP)
if(EXTRA_FLAGS)
set(OPTIMIZE_FLAGS "${OPTIMIZE_FLAGS} ${EXTRA_FLAGS}")
endif(EXTRA_FLAGS)
find_package(Threads)
set_package_properties(Threads PROPERTIES TYPE REQUIRED)
# ---- Build switches
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
# set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ${ipo_supported})
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: Release|Debug|RelWithDebInfo (for distros)." FORCE)
endif()
if (${CMAKE_BUILD_TYPE} MATCHES Release)
set(OPTIMIZE_FLAGS "${OPTIMIZE_FLAGS} -march=native -D_FILE_OFFSET_BITS=64")
endif()
if ((${CMAKE_BUILD_TYPE} MATCHES Release) OR (${CMAKE_BUILD_TYPE} MATCHES RelWithDebInfo))
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS} ${OPTIMIZE_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS} ${OPTIMIZE_FLAGS}")
endif ()
if (${CMAKE_BUILD_TYPE} MATCHES "Debug")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPTIMIZE_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPTIMIZE_FLAGS}")
add_definitions(-Wfatal-errors)
endif ()
if (ASAN)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer -fno-common")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer -fno-common")
endif(ASAN)
if(PROFILING)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
endif(PROFILING)
if(GPROF)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg")
endif(GPROF)
# ---- Include files
file(GLOB INCLUDES
wavefront/*.h*
)
file(GLOB UTILS_INCLUDES
utils/*.h*
)
file(GLOB ALIGNMENT_INCLUDES
alignment/*.h*
)
file(GLOB SYSTEM_INCLUDES
system/*.h*
)
set(wfa2lib_SOURCE
wavefront/wavefront_align.c
wavefront/wavefront_aligner.c
wavefront/wavefront_attributes.c
wavefront/wavefront_backtrace_buffer.c
wavefront/wavefront_backtrace.c
wavefront/wavefront_backtrace_offload.c
wavefront/wavefront_bialign.c
wavefront/wavefront_bialigner.c
wavefront/wavefront.c
wavefront/wavefront_components.c
wavefront/wavefront_compute_affine2p.c
wavefront/wavefront_compute_affine.c
wavefront/wavefront_compute.c
wavefront/wavefront_compute_edit.c
wavefront/wavefront_compute_linear.c
wavefront/wavefront_debug.c
wavefront/wavefront_display.c
wavefront/wavefront_extend.c
wavefront/wavefront_heuristic.c
wavefront/wavefront_pcigar.c
wavefront/wavefront_penalties.c
wavefront/wavefront_plot.c
wavefront/wavefront_slab.c
wavefront/wavefront_unialign.c
system/mm_stack.c
system/mm_allocator.c
system/profiler_counter.c
system/profiler_timer.c
utils/bitmap.c
utils/dna_text.c
utils/sequence_buffer.c
utils/vector.c
utils/commons.c
utils/heatmap.c
utils/string_padded.c
alignment/affine2p_penalties.c
alignment/affine_penalties.c
alignment/cigar.c
alignment/score_matrix.c
)
add_library(wfa2_static
${wfa2lib_SOURCE}
)
add_library(wfa2 SHARED ${wfa2lib_SOURCE})
set_target_properties(wfa2_static PROPERTIES OUTPUT_NAME wfa2)
set_target_properties(wfa2 PROPERTIES SOVERSION 0)
target_include_directories(wfa2 PUBLIC . wavefront utils)
target_include_directories(wfa2_static PUBLIC . wavefront utils)
add_library(wfa2::wfa2 ALIAS wfa2)
add_library(wfa2::wfa2_static ALIAS wfa2_static)
# ---- C++ binding library
set(wfa2cpp_SOURCE
bindings/cpp/WFAligner.cpp
)
file(GLOB CPP_INCLUDES
bindings/cpp/*.h*
)
add_library(wfa2cpp_static STATIC ${wfa2cpp_SOURCE})
add_library(wfa2cpp SHARED ${wfa2cpp_SOURCE})
set_target_properties(wfa2cpp PROPERTIES SOVERSION 0)
set_target_properties(wfa2cpp_static PROPERTIES OUTPUT_NAME wfa2cpp)
target_link_libraries(wfa2cpp PUBLIC wfa2)
target_link_libraries(wfa2cpp_static PUBLIC wfa2)
add_library(wfa2::wfa2cpp ALIAS wfa2cpp)
add_library(wfa2::wfa2cpp_static ALIAS wfa2cpp_static)
# ---- Get version
file (STRINGS "VERSION" BUILD_NUMBER)
add_definitions(-DWFA2LIB_VERSION="${BUILD_NUMBER}")
add_definitions(-DVERSION="${BUILD_NUMBER}")
set(wfa2lib_LIBS
)
# add_dependencies(wfa2lib ${wfa2lib_DEPS})
# ---- Build all
# ---- Test
enable_testing()
function(add_wfa_test)
add_test(
NAME wfa2lib
COMMAND ./tests/wfa.utest.sh
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)
endfunction()
add_wfa_test()
# ---- Install
# Do not install anything when used with FetchContent
if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
install(TARGETS wfa2_static ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} )
install(TARGETS wfa2 ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} )
install(FILES ${INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/wavefront)
install(FILES ${UTILS_INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/utils)
install(FILES ${ALIGNMENT_INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/alignment)
install(FILES ${SYSTEM_INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/system)
install(TARGETS wfa2cpp ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(TARGETS wfa2cpp_static ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(FILES ${CPP_INCLUDES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/wfa2lib/bindings/cpp)
endif()
WFA2-lib-2.3.3/LICENSE 0000664 0000000 0000000 00000002163 14371523677 0014010 0 ustar 00root root 0000000 0000000 MIT License
Copyright (c) 2017 Santiago Marco-Sola
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
AUTHOR/CONTACT: Santiago Marco-Sola
WFA2-lib-2.3.3/Makefile 0000664 0000000 0000000 00000004726 14371523677 0014452 0 ustar 00root root 0000000 0000000 ###############################################################################
# Flags & Folders
###############################################################################
FOLDER_BIN=bin
FOLDER_BUILD=build
FOLDER_BUILD_CPP=build/cpp
FOLDER_LIB=lib
FOLDER_TESTS=tests
UNAME=$(shell uname)
CC:=$(CC)
CPP:=$(CXX)
CC_FLAGS=-Wall -g
AR=ar
AR_FLAGS=-rsc
ifndef BUILD_EXAMPLES
BUILD_EXAMPLES=1
endif
ifndef BUILD_TOOLS
BUILD_TOOLS=1
endif
ifndef BUILD_WFA_PARALLEL
BUILD_WFA_PARALLEL=1
endif
###############################################################################
# Configuration rules
###############################################################################
LIB_WFA=$(FOLDER_LIB)/libwfa.a
LIB_WFA_CPP=$(FOLDER_LIB)/libwfacpp.a
SUBDIRS=alignment \
bindings/cpp \
system \
utils \
wavefront
ifeq ($(BUILD_TOOLS),1)
APPS+=tools/generate_dataset \
tools/align_benchmark
endif
ifeq ($(BUILD_EXAMPLES),1)
APPS+=examples
endif
all: CC_FLAGS+=-O3 -march=native #-flto -ffat-lto-objects
all: build
debug: build
ASAN_OPT=-fsanitize=address -fsanitize=undefined -fsanitize=shift -fsanitize=alignment
ASAN_OPT+=-fsanitize=signed-integer-overflow -fsanitize=bool -fsanitize=enum
ASAN_OPT+=-fsanitize=pointer-compare -fsanitize=pointer-overflow -fsanitize=builtin
# ASAN: ASAN_OPTIONS=detect_leaks=1:symbolize=1 LSAN_OPTIONS=verbosity=2:log_threads=1
asan: CC_FLAGS+=$(ASAN_OPT) -fno-omit-frame-pointer -fno-common
asan: build
###############################################################################
# Build rules
###############################################################################
build: setup
build: $(SUBDIRS)
build: lib_wfa
build: $(APPS)
setup:
@mkdir -p $(FOLDER_BIN) $(FOLDER_BUILD) $(FOLDER_BUILD_CPP) $(FOLDER_LIB)
lib_wfa: $(SUBDIRS)
$(AR) $(AR_FLAGS) $(LIB_WFA) $(FOLDER_BUILD)/*.o 2> /dev/null
$(AR) $(AR_FLAGS) $(LIB_WFA_CPP) $(FOLDER_BUILD)/*.o $(FOLDER_BUILD_CPP)/*.o 2> /dev/null
clean:
rm -rf $(FOLDER_BIN) $(FOLDER_BUILD) $(FOLDER_LIB) 2> /dev/null
$(MAKE) --directory=tools/align_benchmark clean
$(MAKE) --directory=examples clean
rm -rf $(FOLDER_TESTS)/*.alg $(FOLDER_TESTS)/*.log* 2> /dev/null
###############################################################################
# Subdir rule
###############################################################################
export
$(SUBDIRS):
$(MAKE) --directory=$@ all
$(APPS):
$(MAKE) --directory=$@ all
.PHONY: $(SUBDIRS) $(APPS)
WFA2-lib-2.3.3/README.md 0000664 0000000 0000000 00000104412 14371523677 0014262 0 ustar 00root root 0000000 0000000 # WFA2-lib
## 1. INTRODUCTION
### 1.1 What is WFA?
The wavefront alignment (WFA) algorithm is an **exact** gap-affine algorithm that takes advantage of homologous regions between the sequences to accelerate the alignment process. Unlike to traditional dynamic programming algorithms that run in quadratic time, the WFA runs in time `O(ns+s^2)`, proportional to the sequence length `n` and the alignment score `s`, using `O(s^2)` memory (or `O(s)` using the ultralow/BiWFA mode). Moreover, the WFA algorithm exhibits simple computational patterns that the modern compilers can automatically vectorize for different architectures without adapting the code. To intuitively illustrate why the WFA algorithm is so interesting, look at the following figure. The left panel shows the cells computed by a classical dynamic programming based algorithm (like Smith-Waterman or Needleman Wunsch). In contrast, the right panel shows the cells computed by the WFA algorithm to obtain the same result (i.e., the optimal alignment).
### 1.2 What is WFA2-lib?
The WFA2 library implements the WFA algorithm for different distance metrics and alignment modes. It supports various [distance functions](#wfa2.distances): indel, edit, gap-lineal, gap-affine, and dual-gap gap-affine distances. The library allows computing only the score or the complete alignment (i.e., CIGAR) (see [Alignment Scope](#wfa2.scope)). Also, the WFA2 library supports computing end-to-end alignments (a.k.a. global-alignment) and ends-free alignments (including semi-global, glocal, and extension alignment) (see [Alignment Span](#wfa2.span)). In the case of long and noisy alignments, the library provides different [low-memory modes](#wfa2.mem) that significantly reduce the memory usage of the naive WFA algorithm implementation. Beyond the exact-alignment modes, the WFA2 library implements [heuristic modes](#wfa2.heuristics) that dramatically accelerate the alignment computation. Additionally, the library provides many other support functions to display and verify alignment results, control the overall memory usage, and more.
### 1.3 Getting started
Git clone and compile the library, tools, and examples. By default use cmake:
```
git clone https://github.com/smarco/WFA2-lib
cd WFA2-lib
mkdir build
cmake .. -DCMAKE_BUILD_TYPE=Release
cmake --build . --verbose
ctest . --verbose
```
There are some flags that can be used:
```
cmake .. -DOPENMP=TRUE
```
To add vector optimization try
```
cmake .. -DCMAKE_BUILD_TYPE=Release -DEXTRA_FLAGS="-ftree-vectorize -msse2 -mfpmath=sse -ftree-vectorizer-verbose=5"
```
To build a shared library (static is the default)
```
cmake -DBUILD_SHARED_LIBS=ON
```
It is possible to build WFA2-lib in a GNU Guix container, for more information see [guix.scm](./guix.scm).
### 1.4 Contents (where to go from here)
Section [WFA2-lib features](#wfa2.features) explores the most relevant options and features of the library. Then, the folder [tools/](tools/README.md) contains tools that can be used to execute and understand the WFA2 library capabilities. Additionally, the folder [examples/](examples/README.md) contains simple examples illustrating how to integrate the WFA2 code into any tool.
* [Using WFA2-lib in your project](#wfa2.programming)
* [Simple C example](#wfa2.programming.c)
* [Simple C++ example](#wfa2.programming.cpp)
* [WFA2-lib Features](#wfa2.features)
* [Distance Metrics](#wfa2.distances)
* [Alignment Scope](#wfa2.scope)
* [Alignment Span](#wfa2.span)
* [Memory modes](#wfa2.mem)
* [Heuristic modes](#wfa2.heuristics)
* [Technical notes](#wfa2.other.notes)
* [Reporting Bugs and Feature Request](#wfa2.complains)
* [License](#wfa2.licence)
* [Citation](#wfa2.cite)
### 1.5 Important notes and clarifications
- The WFA algorithm is an **exact algorithm**. If no heuristic is applied (e.g., band or adaptive pruning), the core algorithm guarantees to always find the optimal solution (i.e., best alignment score). Since its first release, some authors have referenced the WFA as approximated or heuristic, which is NOT the case.
- Given two sequences of length `n`, traditional dynamic-programming (DP) based methods (like Smith-Waterman or Needleman-Wunsch) compute the optimal alignment in `O(n^2)` time, using `O(n^2)` memory. In contrast, the WFA algorithm requires `O(ns+s^2)` time and `O(s^2)` memory (being `s` the optimal alignment score). Therefore, **the memory consumption of the WFA algorithm is not intrinsically higher than that of other methods**. Most DP-based methods can use heuristics (like banded, X-drop, or Z-drop) to reduce the execution time and the memory usage at the expense of losing accuracy. Likewise, **the WFA algorithm can also use heuristics to reduce the execution time and memory usage**. Moreover, the memory mode `ultralow` uses the BiWFA algorithm to execute in `O(ns+s^2)` time and linear `O(s)` memory.
- **A note for the fierce competitors.** I can understand that science and publishing have become a fierce competition these days. Many researchers want their methods to be successful and popular, seeking funding, tenure, or even fame. If you are going to benchmark the WFA using the least favourable configuration, careless programming, and a disadvantageous setup, please, go ahead. But remember, researchers like you have put a lot of effort into developing the WFA. We all joined this "competition" because we sought to find better methods that could truly help other researchers. So, try to be nice, tone down the marketing, and produce fair evaluations and honest publications.
## 2. USING WFA2-LIB IN YOUR PROJECT
### 2.1 Simple C example
This simple example illustrates how to align two sequences using the WFA2 library. First, include the WFA2 alignment headers.
```C
#include "wavefront/wavefront_align.h"
```
Next, create and configure the WFA alignment object. The following example uses the defaults configuration and sets custom `gap_affine` penalties. Note that mismatch, gap-opening, and gap-extension must be positive values.
```C
// Configure alignment attributes
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.distance_metric = gap_affine;
attributes.affine_penalties.mismatch = 4;
attributes.affine_penalties.gap_opening = 6;
attributes.affine_penalties.gap_extension = 2;
// Initialize Wavefront Aligner
wavefront_aligner_t* const wf_aligner = wavefront_aligner_new(&attributes);
```
Finally, call the `wavefront_align` function.
```C
char* pattern = "TCTTTACTCGCGCGTTGGAGAAATACAATAGT";
char* text = "TCTATACTGCGCGTTTGGAGAAATAAAATAGT";
wavefront_align(wf_aligner,pattern,strlen(pattern),text,strlen(text)); // Align
```
Afterwards, we can use the library to display the alignment result (e.g., the alignment score and CIGAR).
```C
// Display CIGAR & score
cigar_print_pretty(stderr,pattern,strlen(pattern),text,strlen(text),
&wf_aligner->cigar,wf_aligner->mm_allocator);
fprintf(stderr,"Alignment Score %d\n",wf_aligner->cigar.score);
```
At the end of the program, it is polite to release the memory used.
```C
wavefront_aligner_delete(wf_aligner); // Free
```
To compile and run this example, you need to link against the WFA library (-lwfa).
```
$> gcc -O3 wfa_example.c -o wfa_example -lwfa
$> ./wfa_example
```
**IMPORTANT.** Once an alignment object is created, **it is strongly recommended to reuse it to compute multiple alignments**. Creating and destroying the alignment object for every alignment computed can have a significant overhead. Reusing the alignment object allows repurposing internal data structures, minimising the cost of memory allocations, and avoiding multiple alignment setups and precomputations.
### 2.2 Simple C++ example
The WFA2 library can be used from C++ code using the C++ bindings. This example is similar to the previous one but uses C++ bindings. First, include the C++ bindings and remember to use the WFA namespace.
```C
#include "bindings/cpp/WFAligner.hpp"
using namespace wfa;
```
Configure and create the WFA alignment object. In this case, gap-affine distance using custom penalties and the standard memory-usage algorithm (i.e., standard WFA algorithm).
```C++
// Create a WFAligner
WFAlignerGapAffine aligner(4,6,2,WFAligner::Alignment,WFAligner::MemoryHigh);
```
Align two sequences (in this case, given as strings).
```C++
string pattern = "TCTTTACTCGCGCGTTGGAGAAATACAATAGT";
string text = "TCTATACTGCGCGTTTGGAGAAATAAAATAGT";
aligner.alignEnd2End(pattern,text); // Align
```
Display the result of the alignment.
```C++
// Display CIGAR & score
string cigar = aligner.getAlignmentCigar();
cout << "CIGAR: " << cigar << endl;
cout << "Alignment score " << aligner.getAlignmentScore() << endl;
```
**IMPORTANT.** Once an alignment object is created, **it is strongly recommended to reuse it to compute multiple alignments**. Creating and destroying the alignment object for every alignment computed can have a significant overhead. Reusing the alignment object allows repurposing internal data structures, minimising the cost of memory allocations, and avoiding multiple alignment setups and precomputations.
### 2.3 Rust bindings
Rust bindings can be generated automatically using `bindgen`, see [bindings/rust/build.rs](bindings/rust/build.rs).
An example of how to use them is [here](./bindings/rust/example.rs).
## 3. WFA2-LIB FEATURES
* **Exact alignment** method that computes the optimal **alignment score** and/or **alignment CIGAR**.
* Supports **multiple distance metrics** (i.e., indel, edit, gap-lineal, gap-affine, and dual-gap gap-affine).
* Allows performing **end-to-end** (a.k.a. global) and **ends-free** (e.g., semi-global, extension, overlap) alignment.
* Implements **low-memory modes** to reduce and control memory consumption (down to `O(s)` using the `ultralow` mode).
* Supports various **heuristic strategies** to use on top of the core WFA algorithm.
* WFA2-lib **operates with plain ASCII strings**. Although we mainly focus on aligning DNA/RNA sequences, the WFA algorithm and the WFA2-lib implementation work with any pair of strings. Moreover, these sequences do not have to be pre-processed (e.g., packed or profiled), nor any table must be precomputed (like the query profile, used within some Smith-Waterman implementations).
* Due to its simplicity, the WFA algorithm can be automatically vectorized for any SIMD-compliant CPU supported by the compiler. For this reason, **the WFA2-lib implementation is independent of any specific ISA or processor model**. Unlike other hardware-dependent libraries, we aim to offer a multiplatform pairwise-alignment library that can be executed on different processors and models (e.g., SSE, AVX2, AVX512, POWER-ISA, ARM, NEON, SVE, SVE2, RISCV-RVV, ...).
### 3.1 Distance Metrics
The WFA2 library implements the wavefront algorithm for the most widely used distance metrics. The practical alignment time can change depending on the distance function, although the computational complexity always remains proportional to the alignment score or distance. The WFA2 library offers the following distance metrics or functions:
- **Indel (or LCS).** Produces alignments allowing matches, insertions, and deletions with unitary cost (i.e., {M,I,D} = {0,1,1}) but not mismatches. Also known as the longest common subsequence (LCS) problem. The LCS is defined as the longest subsequence common to both sequences, provided that the characters of the subsequence are not required to occupy consecutive positions within the original sequences.
```
PATTERN A-GCTA-GTGTC--AATGGCTACT-T-T-TCAGGTCCT
| ||| ||||| |||||||| | | |||||||||
TEXT AA-CTAAGTGTCGG--TGGCTACTATATATCAGGTCCT
ALIGNMENT 1M1I1D3M1I5M2I2D8M1I1M1I1M1I9M
```
```C
// Configuration
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.distance_metric = indel;
```
- **Edit (a.k.a. Levenshtein).** Produces alignments allowing matches, mismatches, insertions, and deletions with unitary cost (i.e., {M,X,I,D} = {0,1,1,1}). Edit or Levenshtein distance between two sequences is the minimum number of single-character edits (i.e., insertions, deletions, or mismatches) required to transform one sequence into the other.
```
PATTERN AGCTA-GTGTCAATGGCTACT-T-T-TCAGGTCCT
| ||| ||||| |||||||| | | |||||||||
TEXT AACTAAGTGTCGGTGGCTACTATATATCAGGTCCT
ALIGNMENT 1M1X3M1I5M2X8M1I1M1I1M1I9M
```
```C
// Configuration
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.distance_metric = edit;
```
- **Gap-linear (as in Needleman-Wunsch).** Produces alignments allowing matches, mismatches, insertions, and deletions. Allows assigning a penalty (a.k.a. cost or weight) to each alignment operation. It computes the optimal alignment, minimizing the overall cost to transform one sequence into the other. Under the gap-linear model, the alignment score is computed based on {X,I}, where X corresponds to the mismatch penalty and the gap penalty is expressed as the function l(N)=N·I (given the length of the gap N and the gap penalty I).
```
PATTERN A-GCTA-GTGTC--AATGGCTACT-T-T-TCAGGTCCT
| ||| ||||| |||||||| | | |||||||||
TEXT AA-CTAAGTGTCGG--TGGCTACTATATATCAGGTCCT
ALIGNMENT 1M1I1D3M1I5M2I2D8M1I1M1I1M1I9M
```
```C
// Configuration
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.distance_metric = gap_linear;
attributes.linear_penalties.mismatch = 6; // X > 0
attributes.linear_penalties.indel = 2; // I > 0
```
- **Gap-affine (as in Smith-Waterman-Gotoh).** Linear gap cost functions can lead to alignments populated with small gaps. Long gaps are preferred in certain scenarios, like genomics or evolutionary studies (understood as a single event). Under the gap-affine model, the alignment score is computed based on {X,O,E}, where X corresponds to the mismatch penalty and the gap penalty is expressed as the function g(N)=O+N·E (given the length of the gap N, the gap opening penalty O, and the gap extension penalty E).
```
PATTERN AGCTA-GTGTCAATGGCTACT---TTTCAGGTCCT
| ||| ||||| |||||||| | |||||||||
TEXT AACTAAGTGTCGGTGGCTACTATATATCAGGTCCT
ALIGNMENT 1M1X3M1I5M2X8M3I1M1X9M
```
```C
// Configuration
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.distance_metric = gap_affine;
attributes.affine_penalties.mismatch = 6; // X > 0
attributes.affine_penalties.gap_opening = 4; // O >= 0
attributes.affine_penalties.gap_extension = 2; // E > 0
```
- **Dual-cost gap-affine distances.** Also known as piece-wise gap-affine cost, this distance metric addresses some issues that the regular gap-affine distance has with long gaps. In a nutshell, the regular gap-affine distance can occasionally split long gaps by sporadic mismatches (often when aligning long and noisy sequences). Instead, many users would prefer to increase the open gap cost to produce a single long gap. For that, the dual-cost gap-affine distance (p=2) defines two affine cost functions (i.e., for short and long gaps). Then, the alignment score is computed based on {X,O1,E1,O2,E2}, where X corresponds to the mismatch penalty and the gap penalty is expressed as the function g(N)=min{O1+N·E1,O2+N·E2} (given the length of the gap N, the gap opening penalties O1 and O2, and the gap extension penalties E1 and E2).
```C
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.distance_metric = gap_affine_2p;
attributes.affine2p_penalties.mismatch = 6; // X > 0
attributes.affine2p_penalties.gap_opening1 = 4; // O1 >= 0
attributes.affine2p_penalties.gap_extension1 = 2; // E1 > 0
attributes.affine2p_penalties.gap_opening2 = 12; // O2 >= 0
attributes.affine2p_penalties.gap_extension2 = 1; // E2 > 0
```
### 3.2 Alignment Scope
Depending on the use case, it is often the case that an application is only required to compute the alignment score, not the complete alignment (i.e., CIGAR). As it happens with traditional dynamic programming algorithms, the WFA algorithm requires less memory (i.e., `O(s)`) to compute the alignment score. In turn, this results in slighter faster alignment executions. For this reason, the WFA2 library implements two different modes depending on the alignment scope: score-only and full-CIGAR alignment.
The ** score-only alignment ** mode computes only the alignment score. This mode utilizes only the front-wavefronts of the WFA algorithm to keep track of the optimal alignment score. As a result, it requires `O(s)` memory and, in practice, performs slighter faster than the standard full-CIGAR mode.
```C
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.alignment_scope = compute_score;
```
The ** full-CIGAR alignment ** computes the sequence of alignment operations (i.e., {'M','X','D','I'}) that transforms one sequence into the other (i.e., alignment CIGAR). The alignment score can be obtained as a by-product of the alignment process, evaluating the score of the alignment CIGAR. This mode requires `O(s^2)` memory (using the default memory mode, wavefront_memory_high) or less (using the low-memory modes).
```C
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.alignment_scope = compute_alignment;
```
### 3.3 Alignment Span
The WFA2 library allows computing alignments with different spans or shapes. Although there is certain ambiguity and confusion in the terminology, we have tried to generalize the different options available to offer flexible parameters that can capture multiple alignment scenarios. During the development of the WFA we decided to adhere to the classical approximate string matching terminology where we align a **pattern (a.k.a. query or sequence)** against a **text (a.k.a. target, database, or reference)**.
- **End-to-end alignment.** Also known as global alignment, this alignment mode forces aligning the two sequences from the beginning to end of both.
```
PATTERN AATTAATTTAAGTCTAGGCTACTTTCGGTACTTTGTTCTT
|||| |||||||||||||||||||||||||| |||
TEXT AATT----TAAGTCTAGGCTACTTTCGGTACTTT---CTT
```
```C
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.alignment_form.span = alignment_end2end;
```
- **Ends-free alignment.** This alignment mode allows leading and trailing insertions or deletions for "free" (i.e., no penalty/cost on the overall alignment score). Moreover, this alignment mode allows determining the maximum gap length allowed for free at the beginning and end of the sequences. Note that this mode does not implement local alignment as it does not allow free insertions and deletions at the beginning/end of the sequences at the same time. However, it allows many different configurations used across different analyses, methods, and tools.
```
PATTERN AATTAATTTAAGTCTAGGCTACTTTCGGTACTTTGTTCTT
|||||||||||||||||||||||||||||| ||
TEXT ----AATTTAAGTCTAGGCTACTTTCGGTACTTTCTT---
```
```C
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.alignment_form.span = alignment_endsfree;
attributes.alignment_form.pattern_begin_free = pattern_begin_free;
attributes.alignment_form.pattern_end_free = pattern_end_free;
attributes.alignment_form.text_begin_free = text_begin_free;
attributes.alignment_form.text_end_free = text_end_free;
```
- **Other**
Glocal alignment (a.k.a. semi-global or fitting)
- **Glocal alignment (a.k.a. semi-global or fitting).** Alignment mode where the pattern is globally aligned and the text is locally aligned. Often due to the large size of one of the sequences (e.g., the text sequence being a genomic reference), this alignment mode forces one sequence (i.e., pattern) to align globally to a substring of the other (i.e., text).
```
PATTERN -------------AATTTAAGTCTAGGCTACTTTC---------------
||||||||| ||||||||||||
TEXT ACGACTACTACGAAATTTAAGTATAGGCTACTTTCCGTACGTACGTACGT
```
```C
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.alignment_form.span = alignment_endsfree;
attributes.alignment_form.pattern_begin_free = 0;
attributes.alignment_form.pattern_end_free = 0;
attributes.alignment_form.text_begin_free = text_begin_free;
attributes.alignment_form.text_end_free = text_end_free;
```
Extension alignment
- **Extension alignment.** Alignment mode where the start of both pattern and text sequences are forced to be aligned. However, the ends of both are free. This alignment mode is typically used within seed-and-extend algorithms.
```C
// Right extension
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.alignment_form.span = alignment_endsfree;
attributes.alignment_form.pattern_begin_free = 0;
attributes.alignment_form.pattern_end_free = pattern_end_free;
attributes.alignment_form.text_begin_free = 0;
attributes.alignment_form.text_end_free = text_end_free;
PATTERN AATTTAAGTCTG-CTACTTTCACGCA-GCT----------
||||| |||||| ||||||||||| | | |
TEXT AATTTCAGTCTGGCTACTTTCACGTACGATGACAGACTCT
```
```C
// Left extension
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.alignment_form.span = alignment_endsfree;
attributes.alignment_form.pattern_begin_free = pattern_begin_free;
attributes.alignment_form.pattern_end_free = 0;
attributes.alignment_form.text_begin_free = text_begin_free;
attributes.alignment_form.text_end_free = 0;
PATTERN -------------AAACTTTCACGTACG-TGACAGTCTCT
||||||||||||| |||||| ||||
TEXT AATTTCAGTCTGGCTACTTTCACGTACGATGACAGACTCT
```
Overlapped alignment
- **Overlapped alignment (a.k.a. dovetail).**
```C
// Overlapped (Right-Left)
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.alignment_form.span = alignment_endsfree;
attributes.alignment_form.pattern_begin_free = pattern_begin_free;
attributes.alignment_form.pattern_end_free = 0;
attributes.alignment_form.text_begin_free = 0;
attributes.alignment_form.text_end_free = text_end_free;
PATTERN ACGCGTCTGACTGACTGACTAAACTTTCATGTAC-TGACA-----------------
||||||||| |||| |||||
TEXT --------------------AAACTTTCACGTACGTGACATATAGCGATCGATGACT
```
```C
// Overlapped (Left-Right)
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.alignment_form.span = alignment_endsfree;
attributes.alignment_form.pattern_begin_free = 0;
attributes.alignment_form.pattern_end_free = pattern_end_free;
attributes.alignment_form.text_begin_free = text_begin_free;
attributes.alignment_form.text_end_free = 0;
PATTERN ----------------------ACGCGTCTGACTGACTACGACTACGACTGACTAGCAT
||||||||| || ||
TEXT ACATGCATCGATCAGACTGACTACGCGTCTG-CTAAC----------------------
```
### 3.4 Memory modes
The WFA2 library implements various memory modes: `wavefront_memory_high`, `wavefront_memory_med`, `wavefront_memory_low`, and , `wavefront_memory_ultralow`. These modes allow regulating the overall memory consumption at the expense of execution time. The standard WFA algorithm, which stores explicitly all wavefronts in memory, correspond to the mode `wavefront_memory_high`. The other methods progressively reduce memory usage at the expense of slightly larger alignment times. These memory modes can be used transparently with other alignment options and generate identical results. Note that this option does not affect the score-only alignment mode (it already uses a minimal memory footprint of `O(s)`).
```C
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.memory_mode = wavefront_memory_med;
```
### 3.5 Heuristic modes
The WFA algorithm can be used combined with many heuristics to reduce the alignment time and memory used. As it happens to other alignment methods, heuristics can result in suboptimal solutions and loss of accuracy. Moreover, some heuristics may drop the alignment if the sequences exceed certain divergence thresholds (i.e., x-drop/z-drop). Due to the popularity and efficiency of these methods, the WFA2 library implements many of these heuristics. Note, **it is not about how little DP-matrix you compute, but about how good/accurate the resulting alignments are.**
WFA2's heuristics are classified into the following categories: ['wf-adaptive'](#wfa2.wfadaptive), ['drops'](#wfa2.drops), and ['bands'](#wfa2.bands). It is possible to combine a maximum of one heuristic from each category (OR-ing the strategy values or using the API). In the case of using multiple heuristics, these will applied in cascade, starting with 'wf-adaptive', then 'drops', and finally 'bands'.
- **None (for comparison)**. If no heuristic is used, the WFA behaves exploring cells of the DP-matrix in increasing score order (increasing scores correspond to colours from blue to red).
Full-WFA |
 |
```C
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.heuristic.strategy = wf_heuristic_none;
```
- **Heuristic wf-adaptive.** This WFA heuristic removes outer diagonals that are extremely far behind compared to other ones in the same wavefront. Unlike other methods, the adaptive-wavefront reduction heuristic prunes based on the potential of the diagonal to lead to the optimal solution without previous knowledge of the error between the sequences.
```C
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.heuristic.strategy = wf_heuristic_wfadaptive;
attributes.heuristic.min_wavefront_length = 10;
attributes.heuristic.max_distance_threshold = 50;
attributes.heuristic.steps_between_cutoffs = 1;
```
**Graphical examples:**
Adaptive-WF(10,50) |
Adaptive-WF(10,50,10) |
 |
 |
- **Heuristic drops.** This heuristic compares the maximum score computed so far with the score of the last computed cells. Depending on the score difference, these heuristic strategies can reduce the size of the wavefront computed or even abandon the alignment process. In the case of zero-match alignment, $M=1$ will be assumed just for computation of the score drop. Also note that this heuristic is not compatible with distances 'edit' or 'indel'. In this category, WFA2 implements 'X-drop' and 'Z-drop'.
**X-drop** implements the classical X-drop heuristic. For each diagonal $k$, the X-drop heuristic compares the current score $sw_k$ with the maximum observed score so far $sw_{max}$. If the difference drops more than the $xdrop$ parameter (i.e., $sw_{max} - sw_k > xdrop$), the heuristic prunes the diagonal $k$ as it is unlikely to lead to the optimum alignment. If all the diagonals are pruned under this criteria, the alignment process is abandoned.
```C
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.heuristic.strategy = wf_heuristic_xdrop;
attributes.heuristic.xdrop = 100;
attributes.heuristic.steps_between_cutoffs = 100;
```
**Z-drop** implements the Z-drop heuristic (as described in Minimap2). This heuristic halts the alignment process if the score drops too fast in the diagonal direction. Let $sw_{max}$ be the maximum observed score so far, computed at cell ($i'$,$j'$). Then, let $sw$ be the maximum score found in the last computed wavefront, computed at cell ($i$,$j$). The Z-drop heuristic stops the alignment process if $sw_{max} - sw > zdrop + gap_e·|(i-i')-(j-j')|$, being $gap_e$ the gap-extension penalty and $zdrop$ a parameter of the heuristic.
```C
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.heuristic.strategy = wf_heuristic_zdrop;
attributes.heuristic.zdrop = 100;
attributes.heuristic.steps_between_cutoffs = 100;
```
**Graphical examples:**
None |
X-drop(200,1) |
Y-drop(200,1) |
 |
 |
 |
- **Heuristic bands.** These heuristics set a band in the diagonals preventing the wavefront from growing beyond those limits. It allows setting the minimum diagonal (i.e., min_k) and maximum diagonal (i.e., max_k). These heuristics are the most restrictive but the fastest and simplest to compute. In this category, WFA2 implements 'static-band' and 'adaptive-band'.
**Static-band** sets a fixed band in the diagonals preventing the wavefront from growing beyond those limits. It allows setting the minimum diagonal (i.e., min_k) and maximum diagonal (i.e., max_k).
```C
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.heuristic.strategy = wf_heuristic_banded_static;
attributes.heuristic.min_k = -10;
attributes.heuristic.max_k = +10;
```
**Adaptive-band** is similar to the static-band heuristic; however, it allows the band to move towards the diagonals closer to the end of the alignment. Unlike the static-band that is performed on each step, the adaptive-band heuristics allows configuring the number of steps between heuristic band cut-offs.
```C
wavefront_aligner_attr_t attributes = wavefront_aligner_attr_default;
attributes.heuristic.strategy = wf_heuristic_banded_adaptive;
attributes.heuristic.min_k = -10;
attributes.heuristic.max_k = +10;
attributes.heuristic.steps_between_cutoffs = 1;
```
**Graphical examples:**
Banded(10,10) |
Banded(10,150) |
 |
 |
Adaptive-Band(10,10,1) |
Adaptive-Band(50,50,1) |
 |
 |
### 3.6 Some technical notes
- Thanks to Eizenga's formulation, WFA2-lib can operate with any match score. Although, in practice, M=0 is still the most efficient choice.
- Note that edit and LCS are distance metrics and, thus, the score computed is always positive. However, weighted distances, like gap-linear and gap-affine, have the sign of the computed alignment evaluated under the selected penalties. If WFA2-lib is executed using $M=0$, the final score is expected to be negative.
- All WFA2-lib algorithms/variants are stable. That is, for alignments having the same score, the library always resolves ties (between M, X, I,and D) using the same criteria: M (highest prio) > X > D > I (lowest prio). Nevertheless, the memory mode `ultralow` (BiWFA) is optimal (always reports the best alignment) but not stable.
## 4. REPORTING BUGS AND FEATURE REQUEST
Feedback and bug reporting is highly appreciated. Please report any issue or suggestion on github or email to the main developer (santiagomsola@gmail.com). Don't hesitate to contact us
if:
- You experience any bug or crash.
- You want to request a feature or have any suggestion.
- Your application using the library is running slower than it should or you expected.
- Need help integrating the library into your tool.
## 5. LICENSE
WFA2-lib is distributed under MIT licence.
## 6. AUTHORS
[Santiago Marco-Sola](https://github.com/smarco) (santiagomsola@gmail.com) is the main developer and the person you should address your complaints.
[Andrea Guarracino](https://github.com/AndreaGuarracino) and [Erik Garrison](https://github.com/ekg) have contributed to the design of new features and intensive testing of the library.
[Pjotr Prins](https://thebird.nl/) contributed the CMake build system, preventing of leaking variables in include headers and other tweaks.
Miquel Moretó has contributed with fruitful technical discussions and tireless efforts seeking funding, so we could keep working on this project.
## 7. ACKNOWLEDGEMENTS
- Baoxing Song and Buckler's lab for their interest and help promoting the WFA and pushing for the inclusion of new features.
- Juan Carlos Moure and Antonio Espinosa for their collaboration and support of this project.
## 8. CITATION
**Santiago Marco-Sola, Juan Carlos Moure, Miquel Moreto, Antonio Espinosa**. ["Fast gap-affine pairwise alignment using the wavefront algorithm."](https://doi.org/10.1093/bioinformatics/btaa777) Bioinformatics, 2020.
**Santiago Marco-Sola, Jordan M Eizenga, Andrea Guarracino, Benedict Paten, Erik Garrison, Miquel Moreto**. Optimal gap-affine alignment in O(s) space. _bioRxiv_ (2022). DOI [2022.04.14.488380](https://doi.org/10.1101/2022.04.14.488380)
WFA2-lib-2.3.3/VERSION 0000664 0000000 0000000 00000000005 14371523677 0014044 0 ustar 00root root 0000000 0000000 v2.3
WFA2-lib-2.3.3/alignment/ 0000775 0000000 0000000 00000000000 14371523677 0014757 5 ustar 00root root 0000000 0000000 WFA2-lib-2.3.3/alignment/Makefile 0000664 0000000 0000000 00000001522 14371523677 0016417 0 ustar 00root root 0000000 0000000 ###############################################################################
# Definitions
###############################################################################
FOLDER_ROOT=..
FOLDER_BUILD=../build
###############################################################################
# Modules
###############################################################################
MODULES=affine_penalties \
affine2p_penalties \
cigar \
score_matrix
SRCS=$(addsuffix .c, $(MODULES))
OBJS=$(addprefix $(FOLDER_BUILD)/, $(SRCS:.c=.o))
###############################################################################
# Rules
###############################################################################
all: $(OBJS)
# General building rule
$(FOLDER_BUILD)/%.o : %.c
$(CC) $(CC_FLAGS) -I$(FOLDER_ROOT) -c $< -o $@
WFA2-lib-2.3.3/alignment/affine2p_penalties.c 0000664 0000000 0000000 00000002713 14371523677 0020664 0 ustar 00root root 0000000 0000000 /*
* The MIT License
*
* Wavefront Alignment Algorithms
* Copyright (c) 2017 by Santiago Marco-Sola
*
* This file is part of Wavefront Alignment Algorithms.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* PROJECT: Wavefront Alignment Algorithms
* AUTHOR(S): Santiago Marco-Sola
* DESCRIPTION: Gap-Affine 2-Pieces penalties
*/
#include "affine2p_penalties.h"
WFA2-lib-2.3.3/alignment/affine2p_penalties.h 0000664 0000000 0000000 00000004361 14371523677 0020672 0 ustar 00root root 0000000 0000000 /*
* The MIT License
*
* Wavefront Alignment Algorithms
* Copyright (c) 2017 by Santiago Marco-Sola
*
* This file is part of Wavefront Alignment Algorithms.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* PROJECT: Wavefront Alignment Algorithms
* AUTHOR(S): Santiago Marco-Sola
* DESCRIPTION: Gap-Affine 2-Pieces penalties
*/
#ifndef AFFINE2P_PENALTIES_H_
#define AFFINE2P_PENALTIES_H_
/*
* Affine 2-piece penalties
*/
typedef struct {
int match; // (Penalty representation; usually M <= 0)
int mismatch; // (Penalty representation; usually X > 0)
// Usually concave; Q1 + E1 < Q2 + E2 and E1 > E2.
int gap_opening1; // (Penalty representation; usually O1 > 0)
int gap_extension1; // (Penalty representation; usually E1 > 0)
int gap_opening2; // (Penalty representation; usually O2 > 0)
int gap_extension2; // (Penalty representation; usually E2 > 0)
} affine2p_penalties_t;
/*
* Affine 2-piece matrix-type (for bcktrace)
*/
typedef enum {
affine2p_matrix_M,
affine2p_matrix_I1,
affine2p_matrix_I2,
affine2p_matrix_D1,
affine2p_matrix_D2
} affine2p_matrix_type;
#endif /* AFFINE2P_PENALTIES_H_ */
WFA2-lib-2.3.3/alignment/affine_penalties.c 0000664 0000000 0000000 00000002712 14371523677 0020421 0 ustar 00root root 0000000 0000000 /*
* The MIT License
*
* Wavefront Alignment Algorithms
* Copyright (c) 2017 by Santiago Marco-Sola
*
* This file is part of Wavefront Alignment Algorithms.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* PROJECT: Wavefront Alignment Algorithms
* AUTHOR(S): Santiago Marco-Sola
* DESCRIPTION: Gap-Affine penalties
*/
#include "alignment/affine_penalties.h"
WFA2-lib-2.3.3/alignment/affine_penalties.h 0000664 0000000 0000000 00000003737 14371523677 0020436 0 ustar 00root root 0000000 0000000 /*
* The MIT License
*
* Wavefront Alignment Algorithms
* Copyright (c) 2017 by Santiago Marco-Sola
*
* This file is part of Wavefront Alignment Algorithms.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* PROJECT: Wavefront Alignment Algorithms
* AUTHOR(S): Santiago Marco-Sola
* DESCRIPTION: Gap-Affine penalties
*/
#ifndef AFFINE_PENALTIES_H_
#define AFFINE_PENALTIES_H_
/*
* Affine penalties
*/
typedef struct {
int match; // (Penalty representation; usually M <= 0)
int mismatch; // (Penalty representation; usually X > 0)
int gap_opening; // (Penalty representation; usually O > 0)
int gap_extension; // (Penalty representation; usually E > 0)
} affine_penalties_t;
/*
* Affine matrix-type (for backtrace)
*/
typedef enum {
affine_matrix_M,
affine_matrix_I,
affine_matrix_D,
} affine_matrix_type;
#endif /* AFFINE_PENALTIES_H_ */
WFA2-lib-2.3.3/alignment/cigar.c 0000664 0000000 0000000 00000036064 14371523677 0016221 0 ustar 00root root 0000000 0000000 /*
* The MIT License
*
* Wavefront Alignment Algorithms
* Copyright (c) 2017 by Santiago Marco-Sola
*
* This file is part of Wavefront Alignment Algorithms.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* PROJECT: Wavefront Alignment Algorithms
* AUTHOR(S): Santiago Marco-Sola
* DESCRIPTION: Edit cigar data-structure (match/mismatch/insertion/deletion)
*/
#include "utils/commons.h"
#include "cigar.h"
/*
* Setup
*/
cigar_t* cigar_new(
const int max_operations,
mm_allocator_t* const mm_allocator) {
// Allocate
cigar_t* const cigar = mm_allocator_alloc(mm_allocator,cigar_t);
// Allocate buffer
cigar->max_operations = max_operations;
cigar->operations = mm_allocator_malloc(mm_allocator,cigar->max_operations);
cigar->begin_offset = 0;
cigar->end_offset = 0;
cigar->score = INT32_MIN;
// MM
cigar->mm_allocator = mm_allocator;
// Return
return cigar;
}
void cigar_clear(
cigar_t* const cigar) {
cigar->begin_offset = 0;
cigar->end_offset = 0;
cigar->score = INT32_MIN;
}
void cigar_resize(
cigar_t* const cigar,
const int max_operations) {
// Check maximum operations
if (max_operations > cigar->max_operations) {
cigar->max_operations = max_operations;
mm_allocator_free(cigar->mm_allocator,cigar->operations); // Free
cigar->operations = mm_allocator_malloc(
cigar->mm_allocator,max_operations); // Allocate
}
cigar->begin_offset = 0;
cigar->end_offset = 0;
cigar->score = INT32_MIN;
}
void cigar_free(
cigar_t* const cigar) {
mm_allocator_free(cigar->mm_allocator,cigar->operations);
mm_allocator_free(cigar->mm_allocator,cigar);
}
/*
* Accessors
*/
int cigar_get_matches(
cigar_t* const cigar) {
int i, num_matches=0;
for (i=cigar->begin_offset;iend_offset;++i) {
num_matches += (cigar->operations[i]=='M');
}
return num_matches;
}
void cigar_add_mismatches(
char* const pattern,
const int pattern_length,
char* const text,
const int text_length,
cigar_t* const cigar) {
// Refine adding mismatches
int i, p=0, t=0;
for (i=cigar->begin_offset;iend_offset;++i) {
// Check limits
if (p >= pattern_length || t >= text_length) break;
switch (cigar->operations[i]) {
case 'M':
cigar->operations[i] = (pattern[p]==text[t]) ? 'M' : 'X';
++p; ++t;
break;
case 'I':
++t;
break;
case 'D':
++p;
break;
default:
fprintf(stderr,"[CIGAR] Wrong edit operation\n");
exit(1);
break;
}
}
while (p < pattern_length) { cigar->operations[i++] = 'D'; ++p; };
while (t < text_length) { cigar->operations[i++] = 'I'; ++t; };
cigar->end_offset = i;
cigar->operations[cigar->end_offset] = '\0';
// // DEBUG
// printf("Score=%ld\nPath-length=%" PRIu64 "\nCIGAR=%s\n",
// gaba_alignment->score,gaba_alignment->plen,
// cigar->operations);
}
/*
* Score
*/
int cigar_score_edit(
cigar_t* const cigar) {
int score = 0, i;
for (i=cigar->begin_offset;iend_offset;++i) {
switch (cigar->operations[i]) {
case 'M': break;
case 'X':
case 'D':
case 'I': ++score; break;
default: return INT_MIN;
}
}
return score;
}
int cigar_score_gap_linear(
cigar_t* const cigar,
linear_penalties_t* const penalties) {
int score = 0, i;
for (i=cigar->begin_offset;iend_offset;++i) {
switch (cigar->operations[i]) {
case 'M': score -= penalties->match; break;
case 'X': score -= penalties->mismatch; break;
case 'I': score -= penalties->indel; break;
case 'D': score -= penalties->indel; break;
default: return INT_MIN;
}
}
return score;
}
int cigar_score_gap_affine(
cigar_t* const cigar,
affine_penalties_t* const penalties) {
char last_op = '\0';
int score = 0, i;
for (i=cigar->begin_offset;iend_offset;++i) {
switch (cigar->operations[i]) {
case 'M':
score -= penalties->match;
break;
case 'X':
score -= penalties->mismatch;
break;
case 'D':
score -= penalties->gap_extension + ((last_op=='D') ? 0 : penalties->gap_opening);
break;
case 'I':
score -= penalties->gap_extension + ((last_op=='I') ? 0 : penalties->gap_opening);
break;
default:
fprintf(stderr,"[CIGAR] Computing CIGAR score: Unknown operation\n");
exit(1);
}
last_op = cigar->operations[i];
}
return score;
}
int cigar_score_gap_affine2p_get_operations_score(
const char operation,
const int length,
affine2p_penalties_t* const penalties) {
switch (operation) {
case 'M':
return penalties->match*length;
case 'X':
return penalties->mismatch*length;
case 'D':
case 'I': {
const int score1 = penalties->gap_opening1 + penalties->gap_extension1*length;
const int score2 = penalties->gap_opening2 + penalties->gap_extension2*length;
return MIN(score1,score2);
}
default:
fprintf(stderr,"[CIGAR] Computing CIGAR score: Unknown operation\n");
exit(1);
}
}
int cigar_score_gap_affine2p(
cigar_t* const cigar,
affine2p_penalties_t* const penalties) {
char last_op = '\0';
int score = 0, op_length = 0;
int i;
for (i=cigar->begin_offset;iend_offset;++i) {
// Account for operation
if (cigar->operations[i] != last_op && last_op != '\0') {
score -= cigar_score_gap_affine2p_get_operations_score(last_op,op_length,penalties);
op_length = 0;
}
// Add operation
last_op = cigar->operations[i];
++op_length;
}
// Account for last operation
score -= cigar_score_gap_affine2p_get_operations_score(last_op,op_length,penalties);
return score;
}
/*
* Utils
*/
int cigar_cmp(
cigar_t* const cigar_a,
cigar_t* const cigar_b) {
// Compare lengths
const int length_cigar_a = cigar_a->end_offset - cigar_a->begin_offset;
const int length_cigar_b = cigar_b->end_offset - cigar_b->begin_offset;
if (length_cigar_a != length_cigar_b) return length_cigar_a - length_cigar_b;
// Compare operations
char* const operations_a = cigar_a->operations + cigar_a->begin_offset;
char* const operations_b = cigar_b->operations + cigar_b->begin_offset;
int i;
for (i=0;imax_operations = cigar_src->max_operations;
cigar_dst->begin_offset = cigar_src->begin_offset;
cigar_dst->end_offset = cigar_src->end_offset;
cigar_dst->score = cigar_src->score;
memcpy(cigar_dst->operations+cigar_src->begin_offset,
cigar_src->operations+cigar_src->begin_offset,
cigar_src->end_offset-cigar_src->begin_offset);
}
void cigar_append(
cigar_t* const cigar_dst,
cigar_t* const cigar_src) {
// Append
const int cigar_length = cigar_src->end_offset - cigar_src->begin_offset;
char* const operations_src = cigar_src->operations + cigar_src->begin_offset;
char* const operations_dst = cigar_dst->operations + cigar_dst->end_offset;
memcpy(operations_dst,operations_src,cigar_length);
// Update offset
cigar_dst->end_offset += cigar_length;
}
void cigar_append_deletion(
cigar_t* const cigar,
const int length) {
// Append deletions
char* const operations = cigar->operations + cigar->end_offset;
int i;
for (i=0;iend_offset += length;
}
void cigar_append_insertion(
cigar_t* const cigar,
const int length) {
// Append insertions
char* const operations = cigar->operations + cigar->end_offset;
int i;
for (i=0;iend_offset += length;
}
bool cigar_check_alignment(
FILE* const stream,
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length,
cigar_t* const cigar,
const bool verbose) {
// Parameters
char* const operations = cigar->operations;
// Traverse CIGAR
int pattern_pos=0, text_pos=0, i;
for (i=cigar->begin_offset;iend_offset;++i) {
switch (operations[i]) {
case 'M':
// Check match
if (pattern[pattern_pos] != text[text_pos]) {
if (verbose) {
fprintf(stream,
"[AlignCheck] Alignment not matching (pattern[%d]=%c != text[%d]=%c)\n",
pattern_pos,pattern[pattern_pos],text_pos,text[text_pos]);
}
return false;
}
++pattern_pos;
++text_pos;
break;
case 'X':
// Check mismatch
if (pattern[pattern_pos] == text[text_pos]) {
if (verbose) {
fprintf(stream,
"[AlignCheck] Alignment not mismatching (pattern[%d]=%c == text[%d]=%c)\n",
pattern_pos,pattern[pattern_pos],text_pos,text[text_pos]);
}
return false;
}
++pattern_pos;
++text_pos;
break;
case 'I':
++text_pos;
break;
case 'D':
++pattern_pos;
break;
default:
fprintf(stderr,"[AlignCheck] Unknown edit operation '%c'\n",operations[i]);
exit(1);
break;
}
}
// Check alignment length
if (pattern_pos != pattern_length) {
if (verbose) {
fprintf(stream,
"[AlignCheck] Alignment incorrect length (pattern-aligned=%d,pattern-length=%d)\n",
pattern_pos,pattern_length);
}
return false;
}
if (text_pos != text_length) {
if (verbose) {
fprintf(stream,
"[AlignCheck] Alignment incorrect length (text-aligned=%d,text-length=%d)\n",
text_pos,text_length);
}
return false;
}
// OK
return true;
}
/*
* Display
*/
void cigar_print(
FILE* const stream,
cigar_t* const cigar,
const bool print_matches) {
// Check null CIGAR
if (cigar->begin_offset >= cigar->end_offset) return;
// Print operations
char last_op = cigar->operations[cigar->begin_offset];
int last_op_length = 1;
int i;
for (i=cigar->begin_offset+1;iend_offset;++i) {
if (cigar->operations[i]==last_op) {
++last_op_length;
} else {
if (print_matches || last_op != 'M') {
fprintf(stream,"%d%c",last_op_length,last_op);
}
last_op = cigar->operations[i];
last_op_length = 1;
}
}
if (print_matches || last_op != 'M') {
fprintf(stream,"%d%c",last_op_length,last_op);
}
}
int cigar_sprint(
char* buffer,
cigar_t* const cigar,
const bool print_matches) {
// Parameters
int pos = 0;
// Check null CIGAR
if (cigar->begin_offset >= cigar->end_offset) {
buffer[pos] = '\0';
return pos;
}
// Print operations
char last_op = cigar->operations[cigar->begin_offset];
int last_op_length = 1;
int i;
for (i=cigar->begin_offset+1;iend_offset;++i) {
if (cigar->operations[i]==last_op) {
++last_op_length;
} else {
if (print_matches || last_op != 'M') {
pos += sprintf(buffer+pos,"%d%c",last_op_length,last_op);
}
last_op = cigar->operations[i];
last_op_length = 1;
}
}
if (print_matches || last_op != 'M') {
pos += sprintf(buffer+pos,"%d%c",last_op_length,last_op);
}
// Return
buffer[pos] = '\0';
return pos;
}
void cigar_print_pretty(
FILE* const stream,
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length,
cigar_t* const cigar,
mm_allocator_t* const mm_allocator) {
// Parameters
char* const operations = cigar->operations;
// Allocate alignment buffers
const int max_buffer_length = text_length+pattern_length+1;
char* const pattern_alg = mm_allocator_calloc(mm_allocator,max_buffer_length,char,true);
char* const ops_alg = mm_allocator_calloc(mm_allocator,max_buffer_length,char,true);
char* const text_alg = mm_allocator_calloc(mm_allocator,max_buffer_length,char,true);
// Compute alignment buffers
int i, alg_pos = 0, pattern_pos = 0, text_pos = 0;
for (i=cigar->begin_offset;iend_offset;++i) {
switch (operations[i]) {
case 'M':
if (pattern[pattern_pos] != text[text_pos]) {
pattern_alg[alg_pos] = pattern[pattern_pos];
ops_alg[alg_pos] = 'X';
text_alg[alg_pos++] = text[text_pos];
} else {
pattern_alg[alg_pos] = pattern[pattern_pos];
ops_alg[alg_pos] = '|';
text_alg[alg_pos++] = text[text_pos];
}
pattern_pos++; text_pos++;
break;
case 'X':
if (pattern[pattern_pos] != text[text_pos]) {
pattern_alg[alg_pos] = pattern[pattern_pos++];
ops_alg[alg_pos] = ' ';
text_alg[alg_pos++] = text[text_pos++];
} else {
pattern_alg[alg_pos] = pattern[pattern_pos++];
ops_alg[alg_pos] = 'X';
text_alg[alg_pos++] = text[text_pos++];
}
break;
case 'I':
pattern_alg[alg_pos] = '-';
ops_alg[alg_pos] = ' ';
text_alg[alg_pos++] = text[text_pos++];
break;
case 'D':
pattern_alg[alg_pos] = pattern[pattern_pos++];
ops_alg[alg_pos] = ' ';
text_alg[alg_pos++] = '-';
break;
default:
break;
}
}
i=0;
while (pattern_pos < pattern_length) {
pattern_alg[alg_pos+i] = pattern[pattern_pos++];
ops_alg[alg_pos+i] = '?';
++i;
}
i=0;
while (text_pos < text_length) {
text_alg[alg_pos+i] = text[text_pos++];
ops_alg[alg_pos+i] = '?';
++i;
}
// Print alignment pretty
fprintf(stream," ALIGNMENT\t");
cigar_print(stderr,cigar,true);
fprintf(stream,"\n");
fprintf(stream," ALIGNMENT.COMPACT\t");
cigar_print(stderr,cigar,false);
fprintf(stream,"\n");
fprintf(stream," PATTERN %s\n",pattern_alg);
fprintf(stream," %s\n",ops_alg);
fprintf(stream," TEXT %s\n",text_alg);
// Free
mm_allocator_free(mm_allocator,pattern_alg);
mm_allocator_free(mm_allocator,ops_alg);
mm_allocator_free(mm_allocator,text_alg);
}
WFA2-lib-2.3.3/alignment/cigar.h 0000664 0000000 0000000 00000007364 14371523677 0016227 0 ustar 00root root 0000000 0000000 /*
* The MIT License
*
* Wavefront Alignment Algorithms
* Copyright (c) 2017 by Santiago Marco-Sola
*
* This file is part of Wavefront Alignment Algorithms.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* PROJECT: Wavefront Alignment Algorithms
* AUTHOR(S): Santiago Marco-Sola
* DESCRIPTION: Cigar data-structure (match/mismatch/insertion/deletion)
*/
#ifndef CIGAR_H_
#define CIGAR_H_
#include "system/mm_allocator.h"
#include "alignment/linear_penalties.h"
#include "alignment/affine_penalties.h"
#include "alignment/affine2p_penalties.h"
/*
* CIGAR
*/
typedef struct {
// Operations buffer
char* operations;
int max_operations;
int begin_offset;
int end_offset;
// Score
int score;
// MM
mm_allocator_t* mm_allocator;
} cigar_t;
/*
* Setup
*/
cigar_t* cigar_new(
const int max_operations,
mm_allocator_t* const mm_allocator);
void cigar_clear(
cigar_t* const cigar);
void cigar_resize(
cigar_t* const cigar,
const int max_operations);
void cigar_free(
cigar_t* const cigar);
/*
* Accessors
*/
int cigar_get_matches(
cigar_t* const cigar);
void cigar_add_mismatches(
char* const pattern,
const int pattern_length,
char* const text,
const int text_length,
cigar_t* const cigar);
/*
* Score
*/
int cigar_score_edit(
cigar_t* const cigar);
int cigar_score_gap_linear(
cigar_t* const cigar,
linear_penalties_t* const penalties);
int cigar_score_gap_affine(
cigar_t* const cigar,
affine_penalties_t* const penalties);
int cigar_score_gap_affine2p(
cigar_t* const cigar,
affine2p_penalties_t* const penalties);
/*
* Utils
*/
int cigar_cmp(
cigar_t* const cigar_a,
cigar_t* const cigar_b);
void cigar_copy(
cigar_t* const cigar_dst,
cigar_t* const cigar_src);
void cigar_append(
cigar_t* const cigar_dst,
cigar_t* const cigar_src);
void cigar_append_deletion(
cigar_t* const cigar,
const int length);
void cigar_append_insertion(
cigar_t* const cigar,
const int length);
bool cigar_check_alignment(
FILE* const stream,
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length,
cigar_t* const cigar,
const bool verbose);
/*
* Display
*/
void cigar_print(
FILE* const stream,
cigar_t* const cigar,
const bool print_matches);
int cigar_sprint(
char* buffer,
cigar_t* const cigar,
const bool print_matches);
void cigar_print_pretty(
FILE* const stream,
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length,
cigar_t* const cigar,
mm_allocator_t* const mm_allocator);
#endif /* CIGAR_H_ */
WFA2-lib-2.3.3/alignment/linear_penalties.h 0000664 0000000 0000000 00000003347 14371523677 0020455 0 ustar 00root root 0000000 0000000 /*
* The MIT License
*
* Wavefront Alignment Algorithms
* Copyright (c) 2017 by Santiago Marco-Sola
*
* This file is part of Wavefront Alignment Algorithms.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* PROJECT: Wavefront Alignment Algorithms
* AUTHOR(S): Santiago Marco-Sola
* DESCRIPTION: Gap-linear penalties
*/
#ifndef LINEAR_PENALTIES_H_
#define LINEAR_PENALTIES_H_
typedef struct {
int match; // (Penalty representation; usually M <= 0)
int mismatch; // (Penalty representation; usually X > 0)
int indel; // (Penalty representation; usually I > 0)
} linear_penalties_t;
#endif /* LINEAR_PENALTIES_H_ */
WFA2-lib-2.3.3/alignment/score_matrix.c 0000664 0000000 0000000 00000007232 14371523677 0017626 0 ustar 00root root 0000000 0000000 /*
* The MIT License
*
* Wavefront Alignment Algorithms
* Copyright (c) 2017 by Santiago Marco-Sola
*
* This file is part of Wavefront Alignment Algorithms.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* PROJECT: Wavefront Alignment Algorithms
* AUTHOR(S): Santiago Marco-Sola
* DESCRIPTION: Score matrix for alignment using dynamic programming
*/
#include "utils/commons.h"
#include "score_matrix.h"
/*
* Setup
*/
void score_matrix_allocate(
score_matrix_t* const score_matrix,
const int num_rows,
const int num_columns,
mm_allocator_t* const mm_allocator) {
// Allocate DP matrix
int h;
score_matrix->num_rows = num_rows;
score_matrix->num_columns = num_columns;
score_matrix->columns = mm_allocator_malloc(mm_allocator,num_columns*sizeof(int*)); // Columns
for (h=0;hcolumns[h] = mm_allocator_calloc(mm_allocator,num_rows,int,false); // Rows
}
// MM
score_matrix->mm_allocator = mm_allocator;
}
void score_matrix_free(
score_matrix_t* const score_matrix) {
// Parameters
mm_allocator_t* const mm_allocator = score_matrix->mm_allocator;
// DP matrix
const int num_columns = score_matrix->num_columns;
int h;
for (h=0;hcolumns[h]);
}
mm_allocator_free(mm_allocator,score_matrix->columns);
}
/*
* Display
*/
void score_matrix_print_score(
FILE* const stream,
const int score) {
if (-1 < score && score < 10000) {
fprintf(stream," %3d ",score);
} else {
fprintf(stream," * ");
}
}
void score_matrix_print_char(
FILE* const stream,
const char c) {
fprintf(stream," %c ",c);
}
void score_matrix_print(
FILE* const stream,
const score_matrix_t* const score_matrix,
const char* const pattern,
const char* const text) {
// Parameters
int** const matrix = score_matrix->columns;
const int num_columns = score_matrix->num_columns;
const int num_rows = score_matrix->num_rows;
int h;
// Print Header
fprintf(stream," ");
for (h=0;h