bpp-seq-2.1.0/bpp-seq.spec000644 000000 000000 00000012157 12147656566 015312 0ustar00rootroot000000 000000 %define _basename bpp-seq %define _version 2.1.0 %define _release 1 %define _prefix /usr URL: http://biopp.univ-montp2.fr/ Name: %{_basename} Version: %{_version} Release: %{_release} License: CECILL-2.0 Vendor: The Bio++ Project Source: http://biopp.univ-montp2.fr/repos/sources/%{_basename}-%{_version}.tar.gz Summary: Bio++ Sequence library Group: Development/Libraries/C and C++ Requires: bpp-core = %{_version} BuildRoot: %{_builddir}/%{_basename}-root BuildRequires: cmake >= 2.6.0 BuildRequires: gcc-c++ >= 4.0.0 BuildRequires: libbpp-core2 = %{_version} BuildRequires: libbpp-core-devel = %{_version} AutoReq: yes AutoProv: yes %description This library contains utilitary and classes for bio-sequence analysis. It is part of the Bio++ project. %package -n libbpp-seq9 Summary: Bio++ Sequence library Group: Development/Libraries/C and C++ %description -n libbpp-seq9 This library contains utilitary and classes for bio-sequence analysis. It is part of the Bio++ project. %package -n libbpp-seq-devel Summary: Libraries, includes to develop applications with %{_basename} Group: Development/Libraries/C and C++ Requires: libbpp-seq9 = %{_version} Requires: libbpp-core2 = %{_version} Requires: libbpp-core-devel = %{_version} %description -n libbpp-seq-devel The libbpp-seq-devel package contains the header files and static libraries for building applications which use %{_basename}. %prep %setup -q %build CFLAGS="$RPM_OPT_FLAGS" CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=%{_prefix} -DBUILD_TESTING=OFF" if [ %{_lib} == 'lib64' ] ; then CMAKE_FLAGS="$CMAKE_FLAGS -DLIB_SUFFIX=64" fi cmake $CMAKE_FLAGS . make %install make DESTDIR=$RPM_BUILD_ROOT install %clean rm -rf $RPM_BUILD_ROOT %post -n libbpp-seq9 -p /sbin/ldconfig %post -n libbpp-seq-devel createGeneric() { echo "-- Creating generic include file: $1.all" #Make sure we run into subdirectories first: dirs=() for file in "$1"/* do if [ -d "$file" ] then # Recursion: dirs+=( "$file" ) fi done for dir in ${dirs[@]} do createGeneric $dir done #Now list all files, including newly created .all files: if [ -f $1.all ] then rm $1.all fi dir=`basename $1` for file in "$1"/* do if [ -f "$file" ] && ( [ "${file##*.}" == "h" ] || [ "${file##*.}" == "all" ] ) then file=`basename $file` echo "#include \"$dir/$file\"" >> $1.all fi done; } # Actualize .all files createGeneric %{_prefix}/include/Bpp exit 0 %preun -n libbpp-seq-devel removeGeneric() { if [ -f $1.all ] then echo "-- Remove generic include file: $1.all" rm $1.all fi for file in "$1"/* do if [ -d "$file" ] then # Recursion: removeGeneric $file fi done } # Actualize .all files removeGeneric %{_prefix}/include/Bpp exit 0 %postun -n libbpp-seq9 -p /sbin/ldconfig %postun -n libbpp-seq-devel createGeneric() { echo "-- Creating generic include file: $1.all" #Make sure we run into subdirectories first: dirs=() for file in "$1"/* do if [ -d "$file" ] then # Recursion: dirs+=( "$file" ) fi done for dir in ${dirs[@]} do createGeneric $dir done #Now list all files, including newly created .all files: if [ -f $1.all ] then rm $1.all fi dir=`basename $1` for file in "$1"/* do if [ -f "$file" ] && ( [ "${file##*.}" == "h" ] || [ "${file##*.}" == "all" ] ) then file=`basename $file` echo "#include \"$dir/$file\"" >> $1.all fi done; } # Actualize .all files createGeneric %{_prefix}/include/Bpp exit 0 %files -n libbpp-seq9 %defattr(-,root,root) %doc AUTHORS.txt COPYING.txt INSTALL.txt ChangeLog %{_prefix}/%{_lib}/lib*.so.* %files -n libbpp-seq-devel %defattr(-,root,root) %doc AUTHORS.txt COPYING.txt INSTALL.txt ChangeLog %{_prefix}/%{_lib}/lib*.so %{_prefix}/%{_lib}/lib*.a %{_prefix}/include/* %changelog * Tue Mar 05 2013 Julien Dutheil 2.1.0-1 - 'omics' tools now in bpp-seq-omics - Extended BppO support - StateProperties renamed to AlphabetIndex * Thu Feb 09 2012 Julien Dutheil 2.0.3-1 - Improved maf tools + new iterators - Added support for GFF - Added support for FastQ - Several bugs fixed. * Thu Jun 09 2011 Julien Dutheil 2.0.2-1 - Improved maf tools, several bugs fixed. * Mon Feb 28 2011 Julien Dutheil 2.0.1-1 * Mon Feb 07 2011 Julien Dutheil 2.0.0-1 * Thu Mar 25 2010 Julien Dutheil 1.7.0-1 * Wed Jun 10 2009 Julien Dutheil 1.6.0-1 * Thu Dec 11 2008 Julien Dutheil 1.5.0-1 * Mon Jul 21 2008 Julien Dutheil 1.4.1-1 * Fri Jan 18 2008 Julien Dutheil 1.4.0-1 * Fri Jul 06 2007 Julien Dutheil 1.3.0-1 * Fri Jan 19 2007 Julien Dutheil 1.2.0-1 * Mon Aug 28 2006 Julien Dutheil 1.1.1-1 * Tue Apr 18 2006 Julien Dutheil 1.1.0-1 * Fri Nov 16 2005 Julien Dutheil 1.0.0-1 - First draft of the spec file bpp-seq-2.1.0/CMakeLists.txt000644 000000 000000 00000014417 12147656566 015630 0ustar00rootroot000000 000000 # CMake script for Bio++ Sequence library # Author: Sylvain Gaillard and Julien Dutheil # Created: 20/08/2009 # Global parameters CMAKE_MINIMUM_REQUIRED(VERSION 2.6) PROJECT(bpp-seq CXX) IF(NOT CMAKE_BUILD_TYPE) SET(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE) ENDIF(NOT CMAKE_BUILD_TYPE) SET(CMAKE_CXX_FLAGS "-Wall -Weffc++ -Wshadow -Wconversion") IF(NOT NO_VIRTUAL_COV) SET(NO_VIRTUAL_COV FALSE CACHE BOOL "Disable covariant return type with virtual inheritance, for compilers that do not support it." FORCE) ENDIF(NOT NO_VIRTUAL_COV) IF(NO_VIRTUAL_COV) MESSAGE("-- Covariant return with virtual inheritance disabled.") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNO_VIRTUAL_COV=1") ENDIF(NO_VIRTUAL_COV) IF(NOT NO_DEP_CHECK) SET(NO_DEP_CHECK FALSE CACHE BOOL "Disable dependencies check for building distribution only." FORCE) ENDIF(NOT NO_DEP_CHECK) IF(NO_DEP_CHECK) MESSAGE("-- Dependencies checking disabled. Only distribution can be built.") ELSE(NO_DEP_CHECK) # Libtool-like version number # CURRENT:REVISION:AGE => file.so.(C-A).A.R # current: The most recent interface number that this library implements. # revision: The implementation number of the current interface. # age: The difference between the newest and oldest interfaces that this # library implements. # In other words, the library implements all the interface numbers in the # range from number current - age to current. SET(BPPSEQ_VERSION_CURRENT "10") SET(BPPSEQ_VERSION_REVISION "3") SET(BPPSEQ_VERSION_AGE "1") # Effective version number computation math(EXPR BPPSEQ_VERSION_MAJOR "${BPPSEQ_VERSION_CURRENT} - ${BPPSEQ_VERSION_AGE}") SET(BPPSEQ_VERSION_MINOR ${BPPSEQ_VERSION_AGE}) SET(BPPSEQ_VERSION_PATCH ${BPPSEQ_VERSION_REVISION}) SET(BPPSEQ_VERSION "${BPPSEQ_VERSION_MAJOR}.${BPPSEQ_VERSION_MINOR}.${BPPSEQ_VERSION_PATCH}") # Set the CMAKE_PREFIX_PATH for the find_library fonction when using non # standard install location IF(CMAKE_INSTALL_PREFIX) SET(CMAKE_PREFIX_PATH "${CMAKE_INSTALL_PREFIX}" ${CMAKE_PREFIX_PATH}) ENDIF(CMAKE_INSTALL_PREFIX) MACRO(IMPROVED_FIND_LIBRARY OUTPUT_LIBS lib_name include_to_find) #start: FIND_PATH(${lib_name}_INCLUDE_DIR ${include_to_find}) SET(${lib_name}_NAMES ${lib_name} ${lib_name}lib ${lib_name}dll) FIND_LIBRARY(${lib_name}_LIBRARY NAMES ${${lib_name}_NAMES} PATH_SUFFIXES lib${LIB_SUFFIX}) IF(${lib_name}_LIBRARY) MESSAGE("-- Library ${lib_name} found here:") MESSAGE(" includes : ${${lib_name}_INCLUDE_DIR}") MESSAGE(" libraries: ${${lib_name}_LIBRARY}") ELSE(${lib_name}_LIBRARY) MESSAGE(FATAL_ERROR "${lib_name} required but not found.") ENDIF(${lib_name}_LIBRARY) #add the dependency: INCLUDE_DIRECTORIES(${${lib_name}_INCLUDE_DIR}) SET(${OUTPUT_LIBS} ${${OUTPUT_LIBS}} ${${lib_name}_LIBRARY}) ENDMACRO(IMPROVED_FIND_LIBRARY) #Find the Bio++ libraries: IMPROVED_FIND_LIBRARY(LIBS bpp-core Bpp/Clonable.h) # Subdirectories ADD_SUBDIRECTORY(src) # Doxygen FIND_PACKAGE(Doxygen) IF (DOXYGEN_FOUND) ADD_CUSTOM_TARGET (apidoc cp Doxyfile ${CMAKE_BINARY_DIR}/Doxyfile-build COMMAND echo "OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}" >> ${CMAKE_BINARY_DIR}/Doxyfile-build COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_BINARY_DIR}/Doxyfile-build WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) ADD_CUSTOM_TARGET (apidoc-stable cp Doxyfile ${CMAKE_BINARY_DIR}/Doxyfile-stable COMMAND echo "OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}" >> ${CMAKE_BINARY_DIR}/Doxyfile-stable COMMAND echo "HTML_HEADER=header.html" >> ${CMAKE_BINARY_DIR}/Doxyfile-stable COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_BINARY_DIR}/Doxyfile-stable WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) ENDIF (DOXYGEN_FOUND) ENDIF(NO_DEP_CHECK) # Packager SET(CPACK_PACKAGE_NAME "libbpp-seq") SET(CPACK_PACKAGE_VENDOR "Bio++ Development Team") SET(CPACK_PACKAGE_VERSION "2.1.0") SET(CPACK_PACKAGE_VERSION_MAJOR "2") SET(CPACK_PACKAGE_VERSION_MINOR "1") SET(CPACK_PACKAGE_VERSION_PATCH "0") SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "The Bio++ Sequence library") SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING.txt") SET(CPACK_RESOURCE_FILE_AUTHORS "${CMAKE_SOURCE_DIR}/AUTHORS.txt") SET(CPACK_RESOURCE_FILE_INSTALL "${CMAKE_SOURCE_DIR}/INSTALL.txt") SET(CPACK_SOURCE_GENERATOR "TGZ") SET(CPACK_SOURCE_IGNORE_FILES "CMakeFiles" "Makefile" "_CPack_Packages" "CMakeCache.txt" ".*\\\\.cmake" ".*\\\\.git" ".*\\\\.gz" ".*\\\\.deb" ".*\\\\.rpm" ".*\\\\.dmg" ".*\\\\.sh" ".*\\\\..*\\\\.swp" "src/\\\\..*" "src/libbpp*" "debian/tmp" "debian/libbpp.*/" "debian/libbpp.*\\\\.so.*" "debian/libbpp.*\\\\.a" "debian/libbpp.*\\\\.substvars" "debian/libbpp.*\\\\.debhelper" "debian/debhelper\\\\.log" "html" "Seq.tag" "Testing" "build-stamp" "install_manifest.txt" "DartConfiguration.tcl" ${CPACK_SOURCE_IGNORE_FILES} ) IF (MACOS) SET(CPACK_GENERATOR "Bundle") ENDIF() SET(CPACK_SOURCE_PACKAGE_FILE_NAME "${CMAKE_PROJECT_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") SET(CPACK_DEBSOURCE_PACKAGE_FILE_NAME "lib${CMAKE_PROJECT_NAME}_${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.orig") INCLUDE(CPack) #This adds the 'dist' target ADD_CUSTOM_TARGET(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source) # 'clean' is not (yet) a first class target. However, we need to clean the directories before building the sources: IF("${CMAKE_GENERATOR}" MATCHES "Make") ADD_CUSTOM_TARGET(make_clean COMMAND ${CMAKE_MAKE_PROGRAM} clean WORKING_DIRECTORY ${CMAKE_CURRENT_DIR} ) ADD_DEPENDENCIES(dist make_clean) ENDIF() IF(NOT NO_DEP_CHECK) IF (UNIX) #This creates deb packages: ADD_CUSTOM_TARGET(origdist COMMAND cp ${CPACK_SOURCE_PACKAGE_FILE_NAME}.tar.gz ../${CPACK_DEBSOURCE_PACKAGE_FILE_NAME}.tar.gz) ADD_DEPENDENCIES(origdist dist) ADD_CUSTOM_TARGET(deb dpkg-buildpackage -uc -us -i${CPACK_SOURCE_PACKAGE_FILE_NAME}.tar.gz) ADD_DEPENDENCIES(deb origdist) #This creates rpm packages: ADD_CUSTOM_TARGET(rpm rpmbuild -ta ${CPACK_SOURCE_PACKAGE_FILE_NAME}.tar.gz) ADD_DEPENDENCIES(rpm dist) ENDIF() SET(CTEST_UPDATE_TYPE git) SET(UPDATE_COMMAND "git") SET(UPDATE_OPTIONS "") ENABLE_TESTING() INCLUDE(CTest) IF (BUILD_TESTING) ADD_SUBDIRECTORY(test) ENDIF(BUILD_TESTING) ENDIF(NOT NO_DEP_CHECK) bpp-seq-2.1.0/COPYING.txt000644 000000 000000 00000043122 12147656566 014734 0ustar00rootroot000000 000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. bpp-seq-2.1.0/test/test_alignment_scores.cpp000644 000000 000000 00000007655 12147656566 021154 0ustar00rootroot000000 000000 // // File: test_alignment_scores.cpp // Created by: Julien Dutheil // Created on: Wed Dec 14 16:35 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for numerical calculus. This file is part of the Bio++ project. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include #include #include #include #include using namespace bpp; using namespace std; int main() { RNA* alpha = new RNA(); SiteContainer* sites = new VectorSiteContainer(alpha); BasicSequence seq1("seq1", "----AUGCCG---GCGU----UUU----G--G-CCGACGUGUUUU--", alpha); BasicSequence seq2("seq2", "---GAAGGCG---G-GU----UUU----GC-GACCGACG--UUUU--", alpha); BasicSequence seq3("seq3", "---GAA-CCG---G-GU----UUU----VC-GACCGGAG--UUUU--", alpha); sites->addSequence(seq1, false); sites->addSequence(seq2, false); sites->addSequence(seq3, false); //Create alignment indexes: RowMatrix index1; SiteContainerTools::getSequencePositions(*sites, index1); vector scores = SiteContainerTools::getColumnScores(index1, index1); VectorTools::print(scores); for (size_t i = 0; i < sites->getNumberOfSites(); ++i) { if (SiteTools::isGapOnly(sites->getSite(i))) { if (scores[i] != 0) return 1; } else { if (scores[i] != 1) return 1; } } SiteContainer* sites2 = new VectorSiteContainer(alpha); BasicSequence seq21("seq1", "----AUGCCGGCGU-UUUG--G-CCGACGUGUUUU", alpha); BasicSequence seq22("seq2", "---GAAGGCGG-GUU-UUGC-GACCGAC--GUUUU", alpha); BasicSequence seq23("seq3", "---GAA-CCGG-GUUU-UVC-GACCGGA--GUUUU", alpha); sites2->addSequence(seq21, false); sites2->addSequence(seq22, false); sites2->addSequence(seq23, false); RowMatrix index2; SiteContainerTools::getSequencePositions(*sites2, index2); vector scores12 = SiteContainerTools::getColumnScores(index1, index2); VectorTools::print(scores12); //Just a simple test, please check output by eye for better evaluation! if (scores12.size() != index1.getNumberOfColumns()) return 1; vector scores21 = SiteContainerTools::getColumnScores(index2, index1); VectorTools::print(scores21); if (scores21.size() != index2.getNumberOfColumns()) return 1; vector sp12 = SiteContainerTools::getSumOfPairsScores(index1, index2); VectorTools::print(sp12); vector sp21 = SiteContainerTools::getSumOfPairsScores(index2, index1); VectorTools::print(sp21); return 0; } bpp-seq-2.1.0/test/example.gff000644 000000 000000 00000003331 12147656566 016157 0ustar00rootroot000000 000000 ##gff-version 3 ##sequence-region ctg123 1 1497228 ctg123 . gene 1000 9000 . + . ID=gene00001;Name=EDEN ctg123 . TF_binding_site 1000 1012 . + . ID=tfbs00001;Parent=gene00001 ctg123 . mRNA 1050 9000 . + . ID=mRNA00001;Parent=gene00001;Name=EDEN.1 ctg123 . mRNA 1050 9000 . + . ID=mRNA00002;Parent=gene00001;Name=EDEN.2 ctg123 . mRNA 1300 9000 . + . ID=mRNA00003;Parent=gene00001;Name=EDEN.3 ctg123 . exon 1300 1500 . + . ID=exon00001;Parent=mRNA00003 ctg123 . exon 1050 1500 . + . ID=exon00002;Parent=mRNA00001,mRNA00002 ctg123 . exon 3000 3902 . + . ID=exon00003;Parent=mRNA00001,mRNA00003 ctg123 . exon 5000 5500 . + . ID=exon00004;Parent=mRNA00001,mRNA00002,mRNA00003 ctg123 . exon 7000 9000 . + . ID=exon00005;Parent=mRNA00001,mRNA00002,mRNA00003 ctg123 . CDS 1201 1500 . + 0 ID=cds00001;Parent=mRNA00001;Name=edenprotein.1 ctg123 . CDS 3000 3902 . + 0 ID=cds00001;Parent=mRNA00001;Name=edenprotein.1 ctg123 . CDS 5000 5500 . + 0 ID=cds00001;Parent=mRNA00001;Name=edenprotein.1 ctg123 . CDS 7000 7600 . + 0 ID=cds00001;Parent=mRNA00001;Name=edenprotein.1 ctg123 . CDS 1201 1500 . + 0 ID=cds00002;Parent=mRNA00002;Name=edenprotein.2 ctg123 . CDS 5000 5500 . + 0 ID=cds00002;Parent=mRNA00002;Name=edenprotein.2 ctg123 . CDS 7000 7600 . + 0 ID=cds00002;Parent=mRNA00002;Name=edenprotein.2 ctg123 . CDS 3301 3902 . + 0 ID=cds00003;Parent=mRNA00003;Name=edenprotein.3 ctg123 . CDS 5000 5500 . + 1 ID=cds00003;Parent=mRNA00003;Name=edenprotein.3 ctg123 . CDS 7000 7600 . + 1 ID=cds00003;Parent=mRNA00003;Name=edenprotein.3 ctg123 . CDS 3391 3902 . + 0 ID=cds00004;Parent=mRNA00003;Name=edenprotein.4 ctg123 . CDS 5000 5500 . + 1 ID=cds00004;Parent=mRNA00003;Name=edenprotein.4 ctg123 . CDS 7000 7600 . + 1 ID=cds00004;Parent=mRNA00003;Name=edenprotein.4 bpp-seq-2.1.0/test/example.fastq000644 000000 000000 00000000400 12147656566 016525 0ustar00rootroot000000 000000 @EAS54_6_R1_2_1_413_324 CCCTTCTTGTCTTCAGCGTTTCTCC + ;;3;;;;;;;;;;;;7;;;;;;;88 @EAS54_6_R1_2_1_540_792 TTGGCAGGCCAAGGCCGATGGATCA + ;;;;;;;;;;;7;;;;;-;;;3;83 @EAS54_6_R1_2_1_443_348 GTTGCTTCTGGCGTGGGTGGGGGGG +EAS54_6_R1_2_1_443_348 ;;;;;;;;;;;9;7;;.7;393333 bpp-seq-2.1.0/test/test_containers.cpp000644 000000 000000 00000005221 12147656566 017750 0ustar00rootroot000000 000000 // // File: test_containers.cpp // Created by: Julien Dutheil // Created on: Mon Nov 01 10:15 2010 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for numerical calculus. This file is part of the Bio++ project. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include #include #include #include using namespace bpp; using namespace std; int main() { //ProteicAlphabet* alpha = new ProteicAlphabet; RNA* alpha = new RNA(); SiteContainer* sites = new VectorSiteContainer(alpha); BasicSequence seq1("seq1", "----AUGCCG---GCGU----UUU----G--G-CCGACGUGUUUU--", alpha); BasicSequence seq2("seq2", "---GAAGGCG---G-GU----UUU----GC-GACCGACG--UUUU--", alpha); sites->addSequence(seq1, false); sites->addSequence(seq2, false); cout << sites->getNumberOfSites() << endl; cout << sites->toString("seq1") << endl; cout << sites->toString("seq2") << endl; SiteContainerTools::removeGapOnlySites(*sites); cout << endl; cout << sites->getNumberOfSites() << endl; cout << sites->toString("seq1") << endl; cout << sites->toString("seq2") << endl; return (sites->getNumberOfSites() == 30 ? 0 : 1); } bpp-seq-2.1.0/test/CMakeLists.txt000644 000000 000000 00000004767 12147656566 016616 0ustar00rootroot000000 000000 # CMake script for bpp-seq unit tests # Author: Julien Dutheil # Created: 30/10/2010 MACRO(TEST_FIND_LIBRARY OUTPUT_LIBS lib_name include_to_find) #start: FIND_PATH(${lib_name}_INCLUDE_DIR ${include_to_find}) SET(${lib_name}_NAMES ${lib_name} ${lib_name}.lib ${lib_name}.dll) FIND_LIBRARY(${lib_name}_LIBRARY NAMES ${${lib_name}_NAMES}) IF(${lib_name}_LIBRARY) MESSAGE("-- Library ${lib_name} found here:") MESSAGE(" includes: ${${lib_name}_INCLUDE_DIR}") MESSAGE(" dynamic libraries: ${${lib_name}_LIBRARY}") MESSAGE(WARNING "Library ${lib_name} is already installed in the system tree. Test will be built against it. This may lead to unexpected results. You may want to do 'make install' before 'make test', or remove the installed version.") ELSE() SET(${lib_name}_LIBRARY "-L../src -lbpp-seq") SET(${lib_name}_INCLUDE_DIR "../src/") ENDIF() INCLUDE_DIRECTORIES(${${lib_name}_INCLUDE_DIR}) SET(${OUTPUT_LIBS} ${${OUTPUT_LIBS}} ${${lib_name}_LIBRARY}) ENDMACRO(TEST_FIND_LIBRARY) #Find the bpp-seq library library: TEST_FIND_LIBRARY(LIBS bpp-seq Bpp/Seq/Alphabet/Alphabet.h) ADD_EXECUTABLE(test_alphabets test_alphabets.cpp) TARGET_LINK_LIBRARIES(test_alphabets ${LIBS}) ADD_TEST(test_alphabets "test_alphabets") ADD_EXECUTABLE(test_sequences test_sequences.cpp) TARGET_LINK_LIBRARIES(test_sequences ${LIBS}) ADD_TEST(test_sequences "test_sequences") ADD_EXECUTABLE(test_io test_io.cpp) TARGET_LINK_LIBRARIES(test_io ${LIBS}) ADD_TEST(test_io "test_io") ADD_EXECUTABLE(test_containers test_containers.cpp) TARGET_LINK_LIBRARIES(test_containers ${LIBS}) ADD_TEST(test_containers "test_containers") ADD_EXECUTABLE(test_alignment_scores test_alignment_scores.cpp) TARGET_LINK_LIBRARIES(test_alignment_scores ${LIBS}) ADD_TEST(test_alignment_scores "test_alignment_scores") ADD_EXECUTABLE(test_walker test_walker.cpp) TARGET_LINK_LIBRARIES(test_walker ${LIBS}) ADD_TEST(test_walker "test_walker") ADD_EXECUTABLE(test_bowker test_bowker.cpp) TARGET_LINK_LIBRARIES(test_bowker ${LIBS}) ADD_TEST(test_bowker "test_bowker") IF(UNIX) SET_PROPERTY(TEST test_alphabets test_sequences test_io test_containers test_alignment_scores test_walker test_bowker PROPERTY ENVIRONMENT "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:../src") ENDIF() IF(APPLE) SET_PROPERTY(TEST test_alphabets test_sequences test_io test_containers test_alignment_scores test_walker test_bowker PROPERTY ENVIRONMENT "DYLD_LIBRARY_PATH=$ENV{DYLD_LIBRARY_PATH}:../src") ENDIF() IF(WIN32) SET(ENV{PATH} "$ENV{PATH};..\\src") ENDIF() bpp-seq-2.1.0/test/test_bowker.cpp000644 000000 000000 00000007205 12147656566 017100 0ustar00rootroot000000 000000 // // File: test_bowker.cpp // Created by: Julien Dutheil // Created on: Sat Apr 16 13:19 2009 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for numerical calculus. This file is part of the Bio++ project. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include #include #include #include using namespace bpp; using namespace std; BasicSequence* getRandomSequence(const Alphabet* alphabet, unsigned int size) { string seq = ""; for (unsigned int i = 0; i < size; ++i) seq += alphabet->intToChar(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(alphabet->getSize())); return new BasicSequence("random seq", seq, alphabet); } int test(const Alphabet* alphabet, unsigned int size, unsigned int rep) { double n01 = 0; double n05 = 0; double n10 = 0; double n50 = 0; //ofstream out("pvalues.txt", ios::out); for (unsigned int i = 0; i < rep; ++i) { ApplicationTools::displayGauge(i, rep-1); auto_ptr seq1(getRandomSequence(alphabet, size)); auto_ptr seq2(getRandomSequence(alphabet, size)); auto_ptr test(SequenceTools::bowkerTest(*seq1, *seq2)); double p = test->getPValue(); if (p <= 0.01) n01++; if (p <= 0.05) n05++; if (p <= 0.10) n10++; if (p <= 0.50) n50++; //out << p << endl; } //out.close(); cout << n01 / rep << "\t" << n05 / rep << "\t" << n10 / rep << "\t" << n50 / rep << endl; if (abs(n01*100 / rep - 1 ) > 1) return 1; if (abs(n05*100 / rep - 5 ) > 1) return 1; if (abs(n10*100 / rep - 10) > 1) return 1; if (abs(n50*100 / rep - 50) > 1) return 1; return 0; } int main() { //ProteicAlphabet* alpha = new ProteicAlphabet; RNA* alpha = new RNA(); BasicSequence seq1("seq1", "----AUGCCG---GCGU----UUU----G--G-CCGACGUGUUUU--", alpha); BasicSequence seq2("seq2", "---GAAGGCG---G-GU----UUU----GC-GACCGACG--UUUU--", alpha); auto_ptr btest(SequenceTools::bowkerTest(seq1, seq2)); cout << btest->getStatistic() << "\t" << btest->getPValue() << endl; delete alpha; test(&AlphabetTools::DNA_ALPHABET, 1000, 5000); test(&AlphabetTools::PROTEIN_ALPHABET, 20000, 5000); return 0; } bpp-seq-2.1.0/test/example.ph3000644 000000 000000 00000503355 12147656566 016122 0ustar00rootroot000000 000000 100 1209 I t73 LIVSQIRVMV RDG--IHKAM DEE-V----- -------LIA RRVKPYSGNG TQVRNDVEDA N--CQEFVGI ---RELGKH- YK-CMDGFHT VNNGAGEN-S ESAMWIFDLW MCKLNHGMQR --------GD IGRVKRDVQK FPKLKEGAPN CSSFVKPYFM GCDMFHKQVE YRGTHGLVLD DTWNEESATF PYPQVHSRSD KRCLNGLHQG DHEESVH--- HAPRI----M R---LIGH-H LVIIVLMNHD MGRLNHLEPD AVIPSPLRIG GDGWVPKPI- ---------- -------EDG FFDK-D-Q-- ----SRPDVA NASIVPDK-- --DQWVGAHN QWGSSLRKVG LAVHDLR-NI SDLYRCVWMD CHLGANHYRQ ISRMKMITPT YTHMTEYANG L----WHPFY KASDHKNEAQ GVGEK-PHQL PCVESETCQY ETKHAKVCKQ VHG-ALYLIG MKTVVKPLLG DWEGCKCRS- --KLYVSQLD H----KTLSK -NLGLY--MQ KRTWLATAPL Q------IGC MFMLVGRKKN SEE-NYNKAA DPTVIWFYTQ PIVYV-ADVF GCTKGKNPQE MRG------N NVMSESLGDD NLQD--MSGV PVTVCTSSVM VRKDMQD-SV DKRGCTWNAK E-DHLCPSSF CKGER---ED EPGGVTQ--- ---------- ----RCIENI AKLLYIKDV- LCNEVLNGLQ WQLCWSV-GD WESLIPQACW DAKKDLAVCA WKMELVPGL- --NRNNENLA KVIYFGPDGH -------DEG PMQAKIVTLH W-EMDVSHRG -SSVDDNVV- VVMLAFAVSF CHPWGHYIQG LGD-QHKLAR PNT---AQKL SILT---VFH FSGGFRDKPM -ERSQLHS-T YSED----KK DQKVLIHAMK -RNKGLDWHA GND-MKGGPK -IIYLGMLFY -------AEN NVAKQKAHFV VFL-ISDATR LLIVHEGCMI DYTFMEFACH PFFAELFMEH MVARYQYYSN VDIKIFDTCF RSAVYAVDNS WETLCDY-EM LSGYFGAEIN HNPRLPEQVC -PPCTITVSG LKGKLHNRWP VLTQITRTMR IQIAIRVSGS E-SENVVLQY CLYYEENKSI R--------- --QNNTGSAD LPK------- ---------- ----DRLVNS FVD-ELYNSI ---YTAAPKK K--RHL-GIG DNGGMELVRE DIFQILVCV t66 LIVSQIRVMV RDG--IHKAM DEE-V----- -------LIA RRVKPCSGNG TQVRNDVEDA N--CQEFVGI ---RELGKH- YK-CMDGFHT VNNGAGEN-S ESAMWIFDLW MCKLNHGMQR --------GD IGRVKRDVQK FPKLKEGAPN CSSFVKPYFM GCDMFHKQVE YRGTHGLVLD DTWNEESATF PYPQVHSRSD KRCLNGLHQG DHEESVH--- HAPRI----M R---LIGH-H LVIIVLMNHD MGRLNHLEPD AVIPSPLRIG GDGWVPKPI- ---------- -------EDG FFDK-D-Q-- ----SRPDVA NASIVPDK-- --DQWVGAHN QWGSSLRKVG LAVHDLR-NI SDLYRCVWMD CHLGANHYRQ ISRMKMITPT YTHMTEYANG L----WHPFY KASDHKNEAQ GVGEK-PHQL PCVESETCQY ETKHAKVCKQ VHG-ALYLIG MKTVVKPLLG DWEGCKCRS- --KLYVSQLD H----KTLSK -NLGLY--MQ KRTWLATAPL Q------IGC MFMLVGRKKN SEE-NYNKAA DPTVIWFYTQ PIVYV-ADVF GCTKRKNPQE MRG------N NVMSESLGDD NLQD--MSGV PVTVCTSSVM VRKDMQD-SV DKRGCTWNAK E-DHLCPSSF CKGER---ED EPGGVTQ--- ---------- ----RCIENI AKLLYIKDV- LCNEVLNGLQ WQLCWSV-GD WESLIPQACW DAKKDLAVCA WKMELVPGL- --NRNNENLA KVIYFGPDGH -------DEG PMQAKIVTLH W-EMDVSHRG -SSVDDNVV- VVMLAFAVSF CHPWGHYIQG LGD-QHKLAR PNT---AQKL SILT---VFH FSGGFRDKPM -ERSQLHS-T YSED----KK DQKVLIHAMK -RNKGLDWHA GND-MKGGPK -IIYLGMLFY -------AEN NVAKQKAHFV VFL-ISDATR LLIVHEGCMI DYTFMEFACH PFFAELFMEH MVARYQYYSN VDIKIFDTCF RSAVYAVDNS WETLCDY-EM LSGYFGAEIN HNPRLPEQVC -PPCTITVSG LKGKLHNRWP VLTQITRTMR IQIAIRVSGS E-SENVVLQY CLYYEENKSI R--------- --QNNTGSAD LPK------- ---------- ----DRLVNS FVD-ELYNSI ---YTAAPKK K--RHL-GIG DNGGMELVRE DIFQILVCV t32 LIVSQIRVMV RDG--IHKAM DEE-V----- -------LIA RRVKPYSGNG TQVRNDVEDA N--CQEFVGI ---RELGKH- YK-CMDGFHT VNNGAGEN-S ESAMWIFDLW MCKLNHGMQR --------GD IGRVKQDVQK FPKLKEGAPN CSSFVKPYFM GCDMFHKQVE YRGTHGLVLD DTWNEESATF PYPQVHSRSD KRCLNGLHQG DHEESVH--- HAPRI----M R---LIGH-H LVIIVLMNHD MGRLNHLEPD AVIPSPLRIG GDGWVPKPI- ---------- -------EDG FFDK-D-Q-- ----ARPDVA NASIVPDK-- --DQWVGAHN QWGSSLRKVG LAVHDLR-NI SDLYRCVWMD CHLGANHYRQ ISRMKMITPT YTHMTEYANG L----WHPFY KASDHKNEAQ GVGEK-PHQL PCVESETCQY ETKHAKVCKQ VHG-ALYLIG MKTVVKPLLG DWEGCKCRS- --KLYVSQLD H----KTLSK -NLGLY--MQ KRTWLATAPL Q------IGC MFMLVGRKKN SEE-NYNKAA DPTVIWFYTQ PIVYV-ADVF GCTKGKNPQE MRG------N NVMSESLGDD NLQD--MSGV PVTVCTSSVM VRKDMQD-SV DKRGCTWNAK E-DHLCPSSF CKGER---ED EPGGVTQ--- ---------- ----RCIENI AKLLYIKDV- LCNDVLNLLV WQLCWSV-GD WESLIPQACW DAKKDLAVCA WKMELVPGL- --NRNNENLA KVIYFGPDGH -------DEG PMQAKIVTLH W-EMDVSHRG -SSVDDNVV- VVMLAFAVSF CHPWGHYIQG LGD-QHKLAR PNT---AQKL SILT---VFH FSGGFRDKPM -ERSQLHS-T YSED----KK DQKVLIHAMK -RNKGLDWHA GND-MKGGPK -IIYLGMLFY -------AEN NVAKQKAHFV VFL-LSDATR LLIVHEGCMI DYTFMEFACH PFFAELFMEH MVARYQYYSN VDIKIFDTCF RSAVYAVDNS WETLCDY-EM LSGYFGAEIN HNPRLPEQVC -PPCTITVSG LKGKLHNRWP VLTQITRTMR IQIAIRVSGS E-SENVVLQY CLYYEENKSI R--------- --QNNTGSAD LPK------- ---------- ----DRLVNS FVD-ELYNSI ---YTAAPKK K--RHL-GIG DNGGMELVRE DIFQILVCV t75 LIVSEIRVMV RDE--VHKAM DEE-C----- -------LIA RRVKPYSGNG NQIRNDIEDA N--GQEFVGI ---RELGKH- YD-CMDGYHT VNNGAGEN-S ESAMWIFDLW MCKLNHGMQR --------GD IGRVQQDVHK FPKLKEGAPN CSSFVKPVFM GCDMFHRQVQ NRGNHGLVLD DTWNEESGTF PYPQVHSRNE KRCINKLHQG DHNESCH--- HNPRI----V R---RIGH-H LVIIVLVNRD MGRLNYIEPD ALVPSPLRFG GDEWVPKPI- ---------- -------EDG FFDK-D-Q-- ----ARPDVA NA-IVPDR-- --DQWVGAHK EWGSSLCKVG LAVHDLR-NV SDLYRCVWME CVLGANHYQQ VSRMKMVTPA YTHMTEYANG L----WHPFY KNPDHKNEAQ GVGEK-PHQL PCVESETCQY ETKHAKVCKQ VHG-ALYLIG MKTVAKPVLG DWEGCKCRSD LIKLYVSQLD H----KTLSK -NMGLY--MR NRTWLATSPL Q------IGC IFMLVGRKKN SEE-NYNKAA DPTVIWFYTQ PIVYV-ADVF GCTKGKNPQE MRK------N NVISESLGDD NLQD--MPGM PVTVCTSSVM VRKDMHQ-SV DKRGYTWHAK E-DHLCPDSF CKGEK---EE EPGGVRR--- ---------- ----RIVENI VKLLYIKDV- LCNQVLNLLQ WQLCWSV-GD WESLVPQACW GAKKDLAVCA WKMELVPGL- --NRNNENLA KIIYFGPDGH -------DEG NMQAKIVTLH W-EMDVSHRG -SSVDDNVV- VVMLAFQASF CDPWGHYIQG LGD-QYKLAR PNT---ARQL SILV---VMH FSGGFRDKPI -ERSQLHC-T YSEN----KK DQKVVIRAMK -RNKGCDWHA GKD-MAGGPK -IIYVGMLFY -------AES NWAKQKAHFV VFL-ISDATR LLIHHVGCMI EYTFMEFACH PFFAELFMEH MVIRYQYYNN VDIKIFDTCF RGAVYAEDNS WETLCGY-EM LSGYFGAEIN HNPRLPEQVC -PPCTITVSG LKGKLHNRWP VLTQITRTMR IQIPIRISGS E-SENVVLQY CLYYEENKSI R--------- --QNNTGSAV LPK------- ---------- ----DRLVNN FVD-ELYNSV ---YTAAPKK K--RHL-GIG DNGGMELVRE DIFQILVCV t79 LIVSEVRVMV RDG--LNIAI DEL-C----- -------LIA NRVKTFSGSQ NQVRNAVEDA A--RPDFVGT ---RELGKQ- YE-CMDGVGA VDTGAGDN-S ESAVVIFDIW MCQLPHGMQW --------GD IGRVVVDHPK FLKLEEGAPN CSSIPMPYFL GVDMFHREVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV KRCLNVLHQG DHEESRH--- HNARI----V R---RIGL-N LVIISYANVD MGRLNHCEDE AIVFSPLRYG GAGFVSKPI- ---------- -------GDG FFDK-D-Q-- ----ARPLMA NAVCVPEN-- --DQWTGAVG QWCSSVVKCG LADHPLE-NI SRLLRCVVME CVLGANVYHQ ISRMKMHGPT YSHVMEYAHG L----WKPFY QASDHKNEAQ GVGER-THQL PCVESHTCQY EVKHAKVCKL VHH-ALYLLG LRTVMKPHLG DVDGCRCRSD LNNLILSQLD D----KTLSK -NCFTY--LG NMTWLATSPL H------IGC MIIFVGRKQQ SAN-NYNRAG DPTTICFCRK PIVMH-ADVF GCARAKDPQE MRM------N NVISESLGDD SLQQ--MPAV PVDVCVMSVM VNKSMPQ-SH DKRGYTWQAK Q-DHLFPVNV VKGEQ---EE EPEGADQ--- ---------- ----RTVHDM VRLLYSKDV- DCNMVLSLLV WQLC--H-GD WEPLLPQACE SARRDLAVCA WKRELVPGL- --NHNNEFLA KIIYFGPDGH -------DEG PMGQKIVDLH W-EMEASHRE -SSVRDNMI- VIMLPFNINS IDPWRHYILG LND-QIQLTR PNT---ARRL SILH---VMH YSGEFHDKSA -ERSQLHS-R YSEH----KN DHKCVILASK -HNKDHDWAT GKDEMKGGAK -IMNPGALFY -------AEQ NWVRPKNHFV IFL-ICDAVR ILILHHGCMT DHTFMEFACN PFFSELFMEH VYIRYQYINN HDIKIKDTVF RLAVYAWDNF WETLCKC-EM LSGHVGAKIN HNPRLFEQVC -PPVELTVSG LKGKV-NGWP YLTDITRTPL IRIKIREPGS E-SENTVVMY VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- ----ARLVNK VVD-ELYNNV ---YHAIPKK E--MNQ-GVG DSGGVEMVQE DVFQILVCI t27 LIVSEVRVMV RDG--LNIAI DEL-C----- -------LIA NRVKTMSGSQ NQVRNAVEDA A--RPDFVGI ---RELGKQ- YE-CMDGVGA VDTGAGDN-S ESAVVIFDIW MCQLPHGMQW --------GD IGRVVVDHPK FLKLEEGAPN CSSIPMPYFL GCDMFHREVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV KRCLNVLHQG DHEESRH--- HNARI----V R---RIGM-N LVIISYANVD MGRLNHCEDQ AIVYSPLRYG GAGFVSKPI- ---------- -------ADG FFDK-D-Q-- ----ARPLMA NAVCVPEN-- --DQWTGAVG QWCSSVVKCG LADHPLE-NI SRLLRCVVME CVLGANVYHQ ISRMKMHGPT YSHVMEYAHG L----WKPFY QASDHKNEAQ GVGER-THQL PCVESHTCQY EVKHAKVCKL VHH-ALYLLG IHTVMKPHLG NVDGCRCRSD LNNLILSQLD D----KTLSK -NCFTY--LG NMTWLATSPL H------IGC MIIFVGRKQQ SAN-NYNRAG DPTTICFCRK PIVMH-ADVF GCARAKDPQE MRM------N NVISESLGDD SLQQ--MPAV PVTVCVMSHM VKKSMPQ-SH DKRGYTWQAK Q-DHLFPVNC VKGEQ---EE EPEGADQ--- ---------- ----RTVHDM VRLLYSKDV- DCNIVLSLLV WQLC--H-GD WEPLLPQACE GARRDLAVCA WKRELVPGL- --NHNNEFLA KIIYFGPDGH -------DEG PMGQKIVDLH W-EMEASHRE -SSVRDNMI- VIMLPFSINS IDPWRHYILG LND-QIQLTR PNT---ARRL SILH---VMH YSGAFHDKSA -ERSQLHS-R YSEH----KN DHKCVILASK -VNKDHDWIT GKDEMKGGAK -IMNPGALFY -------AEQ NWVRPKNHFV IFL-ICDAVR ILILHHGCMT DHTFMEFACN PFFSELFMEH VYIRYQYVNN HDIKIKDTVF RLAVYAWENF WETLCNC-EM LSGHVGAKIN HNPRLFEQVC -PPVELTVSG LKGKV-NGWP YLTDITRTPL IRIKIREPGS E-SENTVVMY VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- ----ARLVNK VVD-ELYNNV ---YHAIPKK D--MNQ-GVG DSGGVEMVQE DVFQILVCI t93 LIVSEVRVMV RDG--LNIAI DEL-C----- -------LIA NRVKTMSGSQ NQVRNAVEDA A--RPDFVGI ---RELGKQ- YE-CMDGVGA VDTGAGDN-S ESAVVIFDIW MCQLPHGMQW --------GD IGRVVVDHPK FLKLEEGAPN CSSIPMPYFL GCDMFHREVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV KRCLNVLHQG DHEESRH--- HNARI----V R---RIGM-N LVIISYANVD MGRLNHCEDQ AIVYSPLRYG GAGFVSKPI- ---------- -------ADG FFDK-D-Q-- ----ARPLMA NAVCVPEN-- --DQWTGAVG QWCSSVVKCG LADHPLE-NI SRLLRCVVME CVLGANVYHQ ISRMKMHGPT YSHVMEYAHG L----WKPFY QASDHKNEAQ GVGER-THQL PCVESHTCQY EVKHAKVCKL VHH-ALYLLG IHTVMKPHLG NVDGCRCRSD LNNLILSQLD D----KTLSK -NCFTY--LG NMTWLATSPL H------IGC MIIFVGRKQQ SAN-NYNRAG DPTTICFCRK PIVMH-ADVF GCARAKDPQE MRM------N NVISESLGDD SLQQ--MPAV PVTVCVMSHM VKKSMPQ-SH DKRGYTWQAK Q-DHLFPVNC VKGEQ---EE EPEGADQ--- ---------- ----RTVHDM VRLLYSKDV- DCNIVLSLLV WQLC--H-GD WEPLLPQACE GARRDLAVCA WKRELVPGL- --NHNNEFLA KIIYFGPDGH -------DEG PMGQKIVDLH W-EMEASHRE -SSVRDNMI- VIMLPFSINS IDPWRHYILG LND-QIQLTR PNT---ARRL SILH---VMH YSGAFHDKSA -ERSQLHS-R YSEH----KN DHKCVILASK -VNKDHDWIT GKDEMKGGAK -IMNPGALFY -------AEQ NWVRPKNHFV IFL-ICDAVR ILILHHGCMT DHTFMEFACN PFFSELFMEH VYIRYQYVNN HDIKIKDTVF RLAVYAWENF WETLCNC-EM LSGHVGAKIN HNPRLFEQVC -PPVELTVSG LKGKV-NGWP YLTDITRTPL IRIKIREPGS E-SENTVVMY VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- ----ARLVNK VVD-ELYNNV ---YHAIPKK D--MNQ-GVG DSGGVEMVQE DVFQILVCI t59 LIVSEVRVMV RDG--LNIAI DEL-C----- -------LIA NRVKTFSGSQ NQVRNAVEDA A--RPDFVGI ---RELGKQ- YE-CMDGVGA VDTGAGDN-S ESAVVIFDVW MCQLPHGMQW --------GD IGRVVVDHPK FLKLEEGAPN CSSIPMPYFL GCDMFHREVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV KRCLNVLHQG DHEESRH--- HNARI----V R---RIGM-N LVIISYANVD MGRLNHCEDE AIVFSPLRYG GAGFVSKPI- ---------- -------ADG FFDK-D-Q-- ----ARPLMA NAHCVPEN-- --DQWTGAVG QWCSSVVKCG LADHPLE-NI SRLLRCVVME CILGANVYHQ ISRMKMHGPT YSHVMEYAHG M----WKPFY QASDHKNEAQ GVGER-THQL PCVDSHTCQY EVKHAQVCKL VHH-ALYLLG LRTVMKPHLG DVDGCRCRSD LNNLILSQLD D----KTLSK -NCFTY--LG NMTWLATSPL H------IGC MIIFVGRKQQ SGN-NYNQAG DPTTICFCRK PIVMH-ADVF GCARAKDPQE MRM------N NVISESLGDD SLQQ--MPAV PVTVCVMSHM VKKSMPQ-SH DKRGYTWQAK Q-DHLFPVNF VKGEQ---EE EPEGADQ--- ---------- ----RTVHDM VRLLYSKDV- DCNIDLSLLV WQLC--H-GD WEPLLPQACE SARRDLAVCA WKRELVPGL- --NHNNEFLA KIIYFGPDGH -------DEG PMGQKIVDLH W-EMEASHRE -SSVRDNMI- VIMLPFSINS IDPWRHYILG LND-QIQLTR PNT---ACRL SILH---VMH YSGEFHDKSA -ERSQLHS-R YSEH----KN DHKCVILASK -HNKDHDWAT GKDEMKGGAK -IMNPGALFY -------AEQ NWVRPKNHFV IFL-ICDAVR ILILHHGCMT DHTFMEFACN PFFAELFMEH VYIRYQYVNN HDIKIKDTVF RLAVYAWENF WETLCNC-EM LSGHVGAKIN HNPRLFEQVC -PPVELTVSG LKGKV-NGWP YLTDITRTPL IGIKIREPGS Q-SENTVVMY VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- ----HRLVNK VVD-ELYNNV ---YHAIPKK D--MNQ-GVG DSGGVEMVQE DVFQILVCI t7 LIVSEVRVMV RDG--LNIAI DEL-C----- -------LIA NRVKKFSGSQ GQVRNAIEDA A--RPDFVGI ---RELGKQ- YE-CMDGVGA VDTGAGDN-S ESAVVIFDIW MCQLPHGMQW --------GD IGRVVVDHPK FLKLEEGAPN CSSMPMPYFL GCDMFHREVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV KRCLNVLHQG DHEESRH--- HNARI----V R---RIGM-N LVIISYANVD MGRLNHCEDE AIVFSPLRYG GAGFVSKPI- ---------- -------ADG FFDK-D-Q-- ----ARPLMA NAHCVPEN-- --DQWTGAVG QWCSSVVKCG LADHPLE-NI SRLLRCVVME CVLGANVYHQ ISRMKMHGPT YSHVMEYAHG M----WKPFY QASDHKNEAQ GVGER-THQL PCVDSHTCQY EVKHAQVCKL VHH-ALYLLG LRTVMKPHLG DVDGCRCRSD LNNLILSQLD D----KTLSK -NCFTY--LG NMTWLATSPL H------IGC MIIFVGRKQQ SGN-NYNRAG DPTTICFCRK PIVMH-ADVF GCARAKDPQE MRM------N NVISESLGDD SLQQ--MPAV PVTVCVMSHM VKKSMPQ-SH DKRGYTWQAK Q-DHLFPVNF VKGEQ---EE EPEGADQ--- ---------- ----RTVHDM VRLLYSKDV- DCNIVLSLLV WQLC--H-GD WEPLLPQACE SARRDLAVCA WKRELVPGL- --NHNNEFLA KIIYFGPDGH -------DEG PMGQKIVDLH W-EMEASHRE -SSVRDNMI- VIMLPFSINS IDPWRHYILG LND-QIQLTR PNT---ACRL SILH---VMH YSGEFHDKSA -ERSQLHS-R YSEH----KN DHKCVILASK -HNKDHDWAT GKDEMKGGAK -IMNPGALFY -------AEQ NWVRPKNHFV IFL-ICDAVR ILILHHGCMT DRTFMEFACN PFFAELFMEH VYIRYQYVNN HDIKIKDTVF RLAVYAWENF WETLCNC-EM LSGHVGAKIN HNPRLFEQVC -PPVELTVSG LKGKV-NGWP YLTDITRTPL IGIKIREPGS Q-SENTVVMY VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- ----HRLVNK VVD-ELYNNP ---YHAIPKK D--MNQ-GVG DSGGIEMVQE DVFQILVCI t78 LIVSEVRVMV RDG--LNIAI DEL-C----- -------LIA NRVKTFSGSQ GQVRNAIEDA A--RPDFVGI ---RELGKQ- YE-CMDGVGA VDTGAGDN-S ESAVVIFDIW MCQLPHGMQW --------GD IGRVVVDHPK FLKLEEGAPN CSSIPMPYFL GCDMFHREVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV KRCLNVLHQG DHEESRH--- HNARI----V R---RIGM-N LVIISYANVD MGRLNHCEDE AIVFSPLRYG GAGFVSKPI- ---------- -------ADG FFDK-D-Q-- ----ARPLMA NAHCVPEN-- --DQWTGAVG QWCSSVVKCG LADHPLE-NI SRLLRCVVME CVLGANVYHQ ISRMKMHGPT YSHVMEYAHG M----WKPFY QASDHKNEAQ GVGER-THQL PCVDSHTCQY EVKHAQVCKL VHH-ALYLLG LRTVMKPHLG DVDGCRCRSD LNNLILSQLD D----KTLSK -NCFTY--LG NMTWLATSPL H------IGC MIIFVGRKQQ SGN-NYNRAG DPTTICFCRK PIVMH-ADVF GCARAKDPQE MRM------N NVISESLGDD SLQQ--MPAV PVTVCVMSHM VKKSMPQ-SH DKRGYTWQAK Q-DHLFPVNF VKGEQ---EE EPEGADQ--- ---------- ----RTVHDM VRLLYSKDV- DCNIVLSLLV WQLC--H-GD WEPLLPQACE SARRDLAVCA WKRELVPGL- --NHNNEFLA KIIYFGPDGH -------DEG PMGQKIVDLH W-EMEASHRE -SSVRDNMI- VIMLPFSINS IDPWRHYILG LND-QIQLTR PNT---ACRL SILH---VMH YSGEFHDKSA -ERSQLHS-R YSEH----KN DHKCVILASK -HNKDHDWAT GKDEMKGGAK -IMNPGALFY -------AEQ NWVRPKNHFV IFL-ICDAVR ILILHHGCMT DRTFMEFACN PFFAELFMEH VYIRYQYVNN HDIKIKDTVF RLAVYAWENF WETLCNC-EM LSGHVGAKIN HNPRLFEQVC -PPVELTVSG LKGKV-NGWP YLTDITRTPL IGIKIREPGS Q-SENTVVMY VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- ----HRLVNK VVD-ELYNNP ---YHAIPKK D--MNQ-GVG DSGGIEMVQE DVFQILVCI t12 LIVSEVRVMV RDG--INIAI DEL-C----- -------LIA NRVKAFSGHQ NQVRNAMEDA Q--RPDFVGI ---RELGKQ- YQ-CMDGHGA VDTGAGRN-S ESAVWIFDIW MCKLAHGMQW --------GD IGRVSVDHPK FLKLEEGAPR CSSCPMPYFL GCEMFHRQVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV KRCLNVLVQG DHDESRH--- HNARI----V A---RIGM-N MVIIRYLNVE MGRLNHCEDD AMVYSPLRIG GAGTHSKPI- ---------- -------ADG FFDK-D-Q-- ----ARPLIA NAHIVPEN-- --DQWTGAVA QWCSSVIKCG LANHDLE-NI SRLLRCVVME CVLGANVYHQ ASRMKMHGPY YAHVTEYAHG L----WKPFY QTSDHKNEAQ GVGES-THQL PCVESQTCQY EVKHAKVCKL VHH-ALYLLA LHTMMKPRLG DIDGCRCRSE LNKLALSQLD D----KTLSK -NCFQY--LG NMTWLATSPL H------VGC MIIF------ ---------- ---------- ---------- ---------- --V------N NVLSQSLGDD SLQQ--MHAM PVTVVVVSHM VKKSMPQ-SH DKRGYTWQAK DFDQLIPVSF VKGEQ---EE EPEGPNN--- ---------- ----RVVHNI VKLLYTKDV- DCNTVLSLLI WQLC--H-GD WEQLVPQACA GARSDLAVCA WKRELVPGL- --NHNNENLA KVIYFGPDGH -------DEG PMGRKIVDLH W-EMEASHRD -SSVRDNML- VIMLPFLVNN LDPWRHYILG LND-QIQLTR PNT---ARRL SILM---VMH YSGEFHDKSP -ERSQLHFDS YSEH----KN DHKCVILASK -HNKGLDWAT GKDDMKGGAK -IMMSGALFY -------AEQ NWIRARNHFV IFL-ICDAVR ILIVLHGCMT AWTFMEFACN PFFAELFMEH VYIRYQYVNN HDIKIKDTEF RLAVYAWENS WETLCNC-EM LSGHIGAKIN HNPRLFEQVC -PPCELTVSG LKGKH-VGWP YLTDITRTPL IRIQIREPGS E-CENSVVMY VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- ----HRLVNK VVD-ELYNNV ---YHAIPKK D--INV-GVG DSGGMEVVQE DVFQILVCI t81 LIVSEVRHMV RDG--ANVAI DEL-C----- -------LIA CRVKAFSGHG HQVRNAVEDA A--RPDFIGI ---RELGKP- Y--CMDGHGA VNTGAGHN-S ESAVWIFDIW MYKLSHGMQW --------GD IGRVGVDHPK FLKLEEGAPN CSSLPMPYFL GVDMFHRQVT IKGAKGLVLP DIWNESPMAF PYAQVHSKSH KRCLNVLVQG DHEESEH--- HNARI----M S---KIGT-H LVIINIINVE MQRLNHCEDK AIVYSPDRIG GAGFHSKPI- ---------- -------ADG FFDK-D-Q-- ----ARPIIA NAHIVPVN-- --DQWTGPMA QWCSSVIKCG LANHELE-DV SRLLRCVGME CNLGANHYHQ VSRMKMHGPI YSHMTEYAVG L----WKPFY QASEHKNEAQ GLGER-PHQL PCVESQTCQY EIKVAKVCKL VHV-ALYLVG LKTVVKPKLG DWDGIRCRSE LNKLYLSQLD D----KTLSK -NCFIY--LG NMTWLATSPL Q------IGW LVLFDGRKRQ SAE-DYNRAA DPTTISFCRK PIVQH-ADVF GCDRDKDPQE MRT------N NVISESLGDD FLQQ--MPAV PVSVCVYSHM VKKFVPQ-SH EKRGYTWKAK E-DHLVPISY CKGEH---EE EPEGAQY--- ---------- ----RVVQNI VKLLYTKDV- DCNVVLSLLV WQLC--H-GD WEPLIPQACQ TAKKDLAVCA WKRELVPGL- --NCNNEYLA KIIYFGPDGH -------DEG PMGRKIVDLH W-EMEASHRD -SSVRDNYI- VIMLPFKINS RDPWRHYILG IND-QVCLNR PET---ARRL SILA---VMH YSGEFHDKSP -ERSHLHF-R YSEM----KN DVKCVI---K -SNKGLDWTA GKDGMKGEAK -AMMNGALFY -------AEQ NWFRKKNHFV IFV-FCDAVR ILIMHSGVMI GYTFMEFACN PFFAELFMEH VMVRYQYESN HDIKIKDTCF RLAVYAWENS WETLCNV-EM LSGHFGAKIN HVPRLPEQVC -PPCDLTVSG LKGKQ-NGLP YLTDITRTPL IRIQIRVPGS E-SENVVVMY MVYYKERKSI R--------- --QNNTGCAN LPQ------- ---------- ----HRLVVK VAD-ELYNNI ---YHAIPKK D--GVLLGIG DSGGMEMVQE DLFQILVCA t21 LIVSEVRHMV RDG--ANIAI DEL-C----- -------LIA CRVKAFSGHG NQVRNAVEDA P--RPDFIGV ---RELGKP- Y--CMDGHGA VNTGAGVN-S ESAVWIFDIW MYKLSHGMQW --------GD IGRVDVDHPK FLKLEEGAPN CSSLPMPYFL GVDMFHKQVT LKGAKGLVLP DIWNESPMAF PYAQVHSKSH KRCLNVLVQG DHEESEH--- HNARI----M S---RIGT-H LVIVNVINVE MQRLNHCEDK AIVYSPDRIG GAGFHSKPI- ---------- -------ADG FFDK-D-Q-- ----ARPIIA NAHIVPIY-- --DQWTGPMA QWCSSVIKCG LANHELK-VV SRLLRCVGME CNLGANHYHQ VSRMKMHGPI YSHMTEYAHG L----WKPFY QASEHKNEAQ GVGER-PHQL PCVESQKCQY EVKVAKVCKL VHV-ALYLVG LKTVVKPKLG DWDGVRCRSD LNKLYLSQLD D----KTLSK -NCFRY--LG NMTWLATSPL Q------NGC VVIFAGRKRQ SAE-DYNRAA DPTTIAFCRK PIVQH-ADVF GCDKDKDPQE MRM------N NVISESLGDD FLQQ--MPAV PVSVCVHSHM VKKFVPQ-SH EKRGVTWKAK E-DHLVPISL CKGEH---EE EPEGAQY--- ---------- ----RCVQNI VKLLYTKDV- DCNVVLSLLV WQLC--H-GD WEPLVPQACQ TAKKDLAVCA WKRELVPGL- --NCNNEYLA KIIYFGPDGH -------DEG PMGRKIIQLH W-EMEASHRD -SSVRDNII- VIMLPFKINS RDPWRHYILG LND-QVCLTR PET---ARRL SILV---VMH YSGEFHDKSP -ERSHLHY-R YSEV----KN DVKCVILASK -SNKGLDWTA GKDGMKGENK -AMMNGTLFY -------AEQ NWFRQKNHFV IFV-ICDAVR ILIVHSGVMT GFTFMEFACN PFFAELFMEH VYIRYQYQAN HDIKIKDTCF RLAVYAWENS WETLCNV-EM LSGHFGAKIN HVPRLPEQVC -PPCDLTVSG LKGKQ-NGWP YLTDITRTPL IRIQIRVPGS E-SENITVMY MVYYKERKSI R--------- --QNNTGCAN LPQ------- ---------- ----HRLVVK VAD-ELYNNI ---YHAIPKK D--GVLLGIG DSGGMEMAQE DLFQILVCV t80 LIVSEVRHMV RDG--VNIAV DEI-C----- -------LIA NRVKSMSGQG NQVRNAMEMA A--RQNFVGM ---RELEKV- YE-CMDGQGA VNTEAGNN-S ESAVWIFDIW MCKLTHGMQD FGD-----GD IGRVVVDHPK FEKLEEGAPN CSSHPVPYFL GIDMFHKQVM AKGSKGLILP DTWNEASMAF PYPQVHSKSH RRVLNVLHQG DVEES-H--- HSARC----V R---RIGF-H LVIIVFWNVE MGRLNHCEDE AIIYSPLRIG GAGFHSKPV- ---------- -------DDG FFDK-D-Q-- ----ARPVIA SAHIVPEH-- --DQWSGAMA QWCSSVIKCG LANHDLE-NV SRLLRCVMID CNQGANHYIQ ISRMKVHGPT YSHMTEYAHG L----WKPFY QASDHKNDAQ GVGER-PHQL PCVESQTCQY EMKHAKVCKL VHG-ALYLIA LKTVVKPKLG VWQGCRCRSQ LSKLVLSQLD E----KTLSK -NCSIY--LG NMTWLATSPL Q------IGC VILFGGRKRQ SHGLNYNRAA DPTTICFYKK PIVVQGADVF GCARGKDPQE MKA------N NVISGSLGDD RLQQ--MPAM PVTICVSSYM VKKSVIQ-SH QKRGYTWRAK E-DHLIPVSF CKGEL---DE EPDAAQQ--- ---------- ----RIVQNI VKLLYTKDV- SCNVVLSMLI WQIC--H-GD WEPQIPKACS NARKDLAVCA WKRELVPGL- --NHNNENLA KIMYFGHDGH -------DEG PMGRKIVTLH V-EMEVSHRE -SSVKDNMIR VIMLPFSNSS VDPWRHVILG LND-QIKLTR PNT---AHRL SILV---VTH YSGGFHDKSA -ERSQHHF-I YNEV----KD DHKCVILASK -CNKGLDWAA GKD-MKGGAK -VMMNGALFY -------AER NWIRQKHHFV IFV-VCDAVR ILIVHYGCMI GYTFMDFACN PFFAELFMDH VFIRYVYMNN HDIKIMDTCF RVAVYTWENS WETCFNR-EM LSGTFGAKIS HNPRLPEQVI -PPCELTVSG LKGKH-NGWP YLTDITRTPL IKIHIRVPGS Q-SENHVVMY CVYYKEHKSI R--------- --QNNTGCAN LPQ------- ---------- ----HRLVDQ VVD-ELYSKF ---YHAIPKK P--GNM-GVG DSGGMEMVQE DVFQILVCL t14 LIVSEVRHMV RDG--VNIAV DEI-C----- -------LIA NRVKSMSGQG NQVRNAMEMA A--RQNFVGM ---RELEKV- YQ-CMDGQGA VNTEAGNN-S ESAVWIFDIW MCKLTHGMQD FGD-----GD IGRVVCDHPK FEKLEEGAPN CSSHPMPYFL GVDMFHEQVM AKGSKGLILP DTWNEASMAF PYPQVHSKSH RRVLNVLHQG DVEES-H--- HSARC----V R---RIGF-H LVIIVYWNVE MGRLNHCEDE AIIYSPLRIG GAGFHSKPV- ---------- -------DDG FFDK-D-Q-- ----ARPVIA SAHIVPEH-- --DQWSGAMA QWCSSVIKCG LANHDLE-NV SRLLRCVMID CNQGANHYIQ ISRMKVHGPT YSHMTEYAHG L----WKPFY QASDHKSDAQ GVGER-PHQL PCVESQTCQY EMKHAKVCKL VHG-ALYLIA LKTVVKPKLG VWQGCRCRSQ LSKLVLSQLD E----KTLSK -NCSIY--LG NMTWLATSPL Q------IGC VILFGGRKRQ SHGLNYNRAA DPTTICFYKK PIVVQGADVF GCARGKDPQE MKA------N NVISGSLGDD RLQQ--MPAM PVTICVSSYM VKKSVPQ-SH QKRGYTWRAK E-DHLIPVSF CKGEL---DE EPDAAQQ--- ---------- ----RIVQNI VKLLYTKDV- SCNVVLSMLI WQIC--H-GD WEPQIPKACS NARKDLAVCA WKRELVPGL- --NHNNENLA KIMYFGHDGH -------DEG PMGRKIVTLH V-EMEVSHRE -SSVKDNMIR VIMLPFPSSS VDPWRHVILG LND-QIKLTR PNT---AHRL SILV---VTH YSGGFHDKSA -ERSQHHF-I YNEV----KD DHKCVILASK -CNKGLDWAA GKD-MKGGAK -IMMNGALFY -------AER NWIRQKHHFV IFV-VCDAVR ILIVHYRCMI GYTFMDFACN PFFAELFMDH VFIRYQYMNN HDIKIMDTCF RVAVYTWENS WETCCNR-EM LSGTFGAKIS HNPRLPEQVI -PPCELTVSG LKGKH-NGWP YLTEITRTPL IKIHIRVPGS Q-SENHVVMY CVYYKEHKSI R--------- --QNNTGCAN LPQ------- ---------- ----HRLVDQ VVD-ELYSKF ---YHAIPKK P--GNM-GVG DSGGMEMVQE DVFQILVCL t85 LIVSEVRHVV RDG--VNIAV DEI-C----- -------LIA NRVKSMSGQG NQVRNAMEMA A--RQNFVGM ---RELDKA- YQ-CMDGQKA VNTEAGNN-S ESAVWIFDIC MCKLTHGMQD --------GD IGRVVCDHPK FEKLEEGAPN CSSIPMPYFL GIDMFHKQVM AKGSKGLILP DTWNEASMAF PYPQVHSKSH RRVLNVLHQG DHEES-H--- VSARC----M R---RIGF-H LVIINFWNVE MGRLNHCEDE AIIYSPLRIG GAGFHSKPI- ---------- -------NDG FFDK-D-Q-- ----ARPVIA NAHIVPEH-- --DQWSGAMA QWCSSVIKCG LANHDLE-NV SRLLRCVEID CNQGANHYVQ ISRMKMYGPT YSHMTEYAHG L----WKPFY QASDHKNDAQ GVGER-PHQL PCVESQTCQY EVKHANVCKL VHA-ALYLIG LKTVVKPELG VWEGCRCRSQ LSKLALSQLD E----KTLSK -NCSIY--LG NMTWLATSPL Q------IGC IILFGGRKRQ SHGLNYNRAA DPTMICFYKK PIVVQGADIF GCARGKDPQE MKS------N NVISGSLGDD RLQH--MPAM PVTICVFSYM VKKAVPQ-SH QKRGYTWRAK E-DHLIPVSF CKGEL---DE EPDGAQQ--- ---------- ----RVVQNI VKLLYTKDV- SCNKVLSMLI WQIC--H-GD WEPQIPKACN AARKDLAVCA WKRELVPGL- --NHNNEDLA KIMYFGPDGH -------DEG PMGRKIVNLH V-EMEVSHRE -SSVKDNMIR VVMLPFDNNS WDPWRHVILG LND-QIKLTR PNT---AHRL SILV---VMH YSGGFHDKSS -ERSQHHF-I YNEV----KN DHKCVILASK -SNKGLDWAA GKD-MKGGAK -MMANGALFY -------EER NWIRQKNHFV IFV-VCDAVR ILIVHYGCMV PYTFMAFACN PFFAELFMEH VFIRYQYVNN HDIKIMDTCF RVAVYTWENS WETCCNR-EM LSGTFGAKIS HNPRLPEQVI -PPCDLTVSG LKGKH-NGWP YLTDITRTPL IKIHIRIPGS Q-SENHVVMY CVYYKEHKSI R--------- --QNNTGCAN LPQ------- ---------- ----HRLVDK VVD-ELYSKF ---YHAIPKK P--GNM-GIG DSGGMEMVQE DLFQILVCI t62 LIVSEVRVMV RDG--IHIAV DEI-C----- -------LIA NRVKCMSGQG NQARNAMEMA A--RQNFVGM ---RELGKQ- YQ-CMDGQGA VNTEAGNN-S ESAVWIFDIW MCRLTHGMQD --------GD IGRVECDHPK FAKLEEGAPN CSSLPMPYFL GIDMFHNQVM AKGSKGLILP DTWNEASMAF PYPQVHSKSH RRVLNVLHQG DHEES-H--- HSARC----M R---RIGY-H RVIINYWNVE MGRLNHCEDE AIIYSPLRIG GAGFHSKPI- ---------- -------ADG FFDK-D-Q-- ----ARPVIA NAHIVPEY-- --DQWSGAMA QWCSSVIKCG LANHDLE-NV SRLLRCVVID CNHGANHYVQ ISRMKMHGPT YSHMTEYAHG L----WKPFY QASDHKNDAQ GVGER-PHQL PCVEPQTCQY EVKHAKVCKL VHG-ALYLIG LQTVVKPKLG VWEGVQCRSQ LSKLILSQLD D----KTLSK -NCSIY--LG NMTWLATSPL Q------IGC IIMFDGRKRQ SHGLQYNRAA DPTTICFYKK PIVVQGADIF GCARGKDPQE MKA------N NVTSGSLGDD RLQQ--MPAM PVTICVFSYM VKKSVPQ-SH VKRGYTWRAK E-DHLIPVSF CKGEL---EE EPEGAQH--- ---------- ----RMVQNI VKLLYTKDV- SCNVVLSMLI WQIC--H-GD WEPQIPKACD AARKDLAVCA WKRELVPGL- --NHNDENLA KIMYFGPDGH -------DEG PMGEKIVNLH V-EMEASHRE -SSVKDNMIR VIMLPFSNNS WDPWRHVILG LND-KIKLTR PNT---AHRL SILV---VMH YSGGFHDKSA -ERSQHHF-I YNEI----KN DHKCVILASK -ANKPLDWAA GKD-MKGGAK -MMANGALFY -------AER NWIRQKNHFV IFI-ICDAVR ILIVHYGCMV GDTFMDFACN PFFAELFMEH VFIRYQYIPN HDIKIMDTCF RVAVYTWENS WETCCNR-EM LSGTFGARIS HVPRLPEQVI -PPCELTVSG LKGKH-NGWP YLTDITRTPL IRIHIRVPGS E-SENHVVMY CVYYKEHKSI R--------- --QNNTGCAN LPQ------- ---------- ----HRLVDR VVD-ELYSKF ---YHAIPKK P--GNM-DVG DSGGMEMVME DVFQILVCV t19 LIVSEVRHMV RDG--INIAV DEI-C----- -------LIA NRVKCVSGQG NQARNAMEMA A--RQNFVGM ---RELGKQ- YQ-CMDGQGA VNTEAGNN-S ESAVWIFDIW MCRLTHGMQD --------GD IGRVECDHPK FAKLEEGAPN CSSLPMPYFL GIDMFHKQVM GKGSKGLILP DTWNEASMAF PYPQVHSKSH RRVLNVLHQG DHEES-H--- HSARC----M R---RIGYGH LVIINFWNVE MGRLNHCEDE AIIYSPLRIG GAGFHSKPI- ---------- -------ADG FFDK-D-Q-- ----ARPVIA NAHIVPEY-- --DQWSGAMA QWCSSVIKCG LANHDLE-NV SRLLRCVVID CNHGANHYVQ ISRMKMHGPT YSHMTEYAHG L----WKPFY QASDHKNDAQ GVGER-THQL PCVEPQTCQY EVKHAKVCKL VHG-ALYLIG LQNVVKPKLG VWEGVQCRSQ LSKLILSQLD D----KTLSK -NCSIY--LG NMTWLATSPL Q------IGC IILFDGRKRQ SHGLQYNRAA DPTTICFYKK PIVVQGADIF GCARGKDPQE MKA------N NVTSGSLGDD RLQQ--MPAM PVTICVFSYM VKKLVPQ-SH QKRGYTWRAK E-DHLIPVSF CKGEL---EE EPEGAKQ--- ---------- ----RMVQNI VKLLYTKDV- SCNVVLSMLI WQIC--H-GD WEPQIPKACD AARKDLAVCA WKRELVPGL- --NHNDENLA KIMYFGPDGH -------DEG PMGEKIVNLH V-EMEASHRE -SSVKDNMIR VIMLPFSNNS WDPWRHVILG LND-QIKLTR PNT---AHRL SILV---VMH YSGGFHDKSA -ERSQHHF-I YNEI----KN DHKCVILASK -ANKGLDWAA GKD-MKGGAK -MMVNGALFY -------AER NWIRQKNHFV IFV-ICDAVR ILIVHYGCMV GDTFMDFACN PFFAELFMEH VFIRYQYIPN HDIKIMDTCF RVAVYTWENS WETCCNR-EM LSGTFGARIS HVPRLPEQVI -PPCELTVSG LKGKH-NGWP YLTDITRTPL IRIHIRVPGS E-SENHVVMY CVYYKEHKSI R--------- --QNNTGCAN LPQ------- ---------- ----HRLVDK VVD-ELYSKF ---YHAIPKK P--GNM-DVG DSGGMEMVME DVFQILVCV t77 LIVSEVRHMV RDG--INIAV DEI-C----- -------LIA NRVKSMSGQG NQARNAMEMA A--RQNFVGM ---RELGKQ- YH-CMDGQGT VNTEAGNN-S ESAVWIFDIW LCRLTHGMQD --------GD IGRVECDHPK FAKLEEGAPN CSSLPMPYFL GIDMFHRQVM AKGSKGLILP DTWNEASMAF PYPQVHSKSH RRVLNVLHQG DHEES-H--- HSARC----M R---RIGY-V LVIINFWNVE MGRLIHCEDE AIIYSPLRIG GAGFHSKPI- ---------- -------ADG FFDK-D-Q-- ----ARPVIA NAHIVPEY-- --DQWSGAMP QWVSSVIKCG LANHDLE-NV SRLLRCVVID VNQGANHYVQ ISRMKMHGPT YSHMTEYAHG L----WKPFY QASDHKNDAQ GVGER-VHQL PCVEPQTCQY EVKHAKVCKH VHG-ALYLIG LQTVVKPKLG VWEGCQCRSQ LSKLILSQLD D----KTLSK -NCNIY--LG NMTWLATFPL Q------IGC IILFDGRKRQ SYGLQYNRAA DPTTICFYKK PIVVQGADIF GCARGKDPQE MKA------N NVTSGSLGDD RLQQ--MPAM PVTICVFSYM VKKSVPQ-SH QKRGYTWRAK E-DHLIPVSF CKGEL---EE EPEGAQQ--- ---------- ----RIVQNI VKLLYTKDI- SCNVVLSMLI WQIC--H-GD WEPQLPKACD AARKDLAVCA WKRELVPGL- --NHNDENLA KIMYFGPDGH -------DEG PMGEKIVNLH V-EMEASHRE -SSVKDNMIR VIMLPFGENS WDPWRHVILG LND-QIKLTR PNT---AHRL SILV---VMH YSGGFHDNSA -ERSQHHF-I YNEV----KN DHKCVILASK -ANKGLDWAA GKD-MKGGAK -MMVNGALFY -------AER NWIRQKNHFV IFV-ICDAVR ILIVHYGCMV GDTFMDFACN PFFAELFMEH VVIRYQYIPN HDIKIMDTCF RVAVYTWENS WETCCNR-EM LSGTFGARIS HVPRLPEQVI -PPCELTVSG LKGKH-NGWP YLTDITRTPL IRIHIRVPGS E-SENHVVMY CVYYKEHKSI R--------- --QNNTGCAN LPQ------- ---------- ----HRLVDK VVD-ELYSKF ---YHAIPKK P--GNM-DVG DSGGMEMVME DVFQILVCA t88 LIVSECRLII RDG--NHDAI DEM-CCVAND LNNEIERLVA SMVKSFRGHD SQARNNSECM R--SAPFIGV ---RELFKR- YHKCVEGAGC VHTVAGTP-S DSPVWMFDQF MCQLTHSMVD --------GD LGRVVQDNVI FAKLKEGAPH CISL--PHFL GIDMFHTQVF VGGH--LILP DPCYELSISV MYAGHASYNQ KRCINNLDQG DQEDSNHRKE HKIRASVLLY R---QIGI-L -VIIKEANEL MNRLNHKEPE NGIIFPLR-- -DAQDPKQI- ---------- -------LNG LFDK-E-E-- ----NRPMVQ DADSVVGS-- --AQWAGQHR SWCSSDDKA- ---------- SQLPRNTHIV VEIGANVYEQ FSRMKTNIPI YAHVTEYAVG V----ERPFY E-SEFKNEAQ GWGES-GTSI PCVDSPDVQY EMKVAWVDKL MHT-ALYLMP LATVHKPEMG TVRGERCRAI L-KLLMMQLD ------TLSR -NQLPK--LC QGTWLDASPL Q------IGV QVMLVGKKGG SKK-EYELAA DQVIIYFYQA PIIYVKADVF SGTVAKKAQA MR-------K STGSQSIGDD GMQS--MPLM QNAVCVWSKM VRKVQPD-GQ DKREQTWMAK D-DTLCPPSM KRGEK---TA EPTQWMG--- ---------- ----TVTVNK IKLLYCKDC- SCNEVMKILS WWLCNSV-GD WQTLMSQACI TADPNPPVCV WKRELVPGL- --NRSVENLA KIIYFCPDEH -------DER KMWGKIFALE W-EMDISHRH -SSVDDNHC- VEMLPFMCQR VDPWGHYVQI LAD-RQDLAR PVT---LQAL SILP---CPH ASGKEQDGAV -ERSQHYV-V YAEL----QV DHKCTIAAHK YQDKMKDWDT GKN-MDEGAK -MYVEIHLFY -------AEN NIRRQNDHSV IFW-ISDAKK RLICGMSSMC LVTFMTLACN PLFAKLFMEV VPMRYDYLTN HDIKI----- -MEVYAPENS WETIKNV-EM VSGKEGAEIN HTPQLPEQVI -PPCNLTVSN LKGMI-PPCP ALTEITRTV- -FIV--MYYA I-LRNKIMQY CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- ----HHMQNI FID-ELYPGG ---YHAAPKK E--EMV-GPG DNGGVEDIKE DMFQVLVCR t37 LIVSDMRLII REG--NDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S ESPVWMFDKF MCQLTHSMVN --------GD LGRVVHDNWT FTKLKEGAPH CIAL--PYFM GIDMFHIQVF VGGN--LILP DPCYELSISV MYAGHASYNQ KRCINGLDNG DRQDSDPRKE HKIRNSVLLY R---QIGL-L -VIIKEANEL MGRLNHKEPK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- ---------- SQLPRNTRIV AEIGANVYEQ YSRMKTNIPI YARVTEYAVG V----ARPFY D-AEFKNEAQ GQGES-GTSI PCVDSPDIQY EMKHAWVEKL MHS-ALYLMP LATVHKPEMG TVKGERCRTI L-KLLMVQLD E----KTLSQ -NQIPD--LC QRTWLDASPL Q------IGV QVVLVGKKRG SKK-DYELAA DPVIIYFYQA PIIHHKADVF AGTVAKKSQA MRS------R SIGSQSVGDD GMQN--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWMAK D-DTLCPPCE EQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- SCNEVLKILA WWLCDSV-GD WQTLMNDACR SANPNYPVCI WKRELVPGL- --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRH -SSVDDNQC- VELLPYVCQR MDPWGHYVQI LAD-RLDLTR PVT---LQGL SILP---CPH ASGKEQDGAV -ERSQHYG-V YQEL----QN DHKSTIDANK NDNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NLRKQNEHSV TFV-ISDAQK RLICGVSSMV MRTFMTLACN PFFAKLFMEV VPMSYDYVPN HDIKI----- -IEVYAPENS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI -PPCCLTVSN LKGMI-PPVP ALTEITRTP- -FIV--MYVA I-LRNDIMQY CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-SPG DNGGVEAIRE DMFQVLVCK t35 LIVSDMRLII REG--NDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S ESPVWMFDKF MCQLTHSMVN --------GD LGRVVHDNWT FTKLKEGAPH CIAL--PYFM GIDMFHIQVF VGGN--LILP DPCYELSISV MYAGHASYNQ KRCINGLDNG DRQDSDPRKE HKIRNSVLLY R---QIGL-L -VIIKEANEL MGRLNHKEPK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- ---------- SQLPRNTRIV AEIGANVYEQ YSRMKTVIPI YARVTEYAVG V----PRPFY D-AEFKNEAQ GQGES-GTSI PCVDSPDIQY EMKHAWVEKL MHS-ALYLMP LATVHKPQMG TVKGERCRTI L-KLLMVQLD E----KTLSQ -NQIPD--LC QRTWLDASPL Q------IGV QVVLVGKKRG SKK-DYELAA DPVIIYFYQA PIIYHKADVF AGTVAKKSQA MRS------R SIGSQSVGDD GMQN--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWMAK D-DTLCPPCE EQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- SCNEVLKILA WWLCDSV-GD WQTLMNDACR SANPNYPVCI WKRELVPGL- --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRH -SSVDDNQC- VELLPYVCQR MDPWGHYVQI LAD-RLDLTR PVT---LQGL SILP---CPH ASGKEQDGAV -ERSQHYG-V YQEL----QN DHKSTIDANK NDNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NLRKQNEHSV TFV-ISDAQK RLICGVSSMA MRTFMTLACN PFFAKLFMEV VPMSYDYVPN HDIKI----- -IEVYAPENS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI -PPCCLTVSN LKGMI-PPCP ALTEITRTP- -FIV--MYVA I-LRNDIMQY CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-SPG DNGGVEAIRE DMFQVLVCK t48 LIVSDMRLII REG--SDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S ESPVWMFDKF MCQLTHSMVN --------GD LGRVVHDNWT FTKLKEGAPH CIAL--PYFM GIDMFHIQVF VGGN--LILP DPCYELSISV MYAGHASYNQ KRCINGLDNG DRQDSDPRKE HKIRNSVLLY R---QIGL-L -VIIKEANEL MGRLNHKEPK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- ---------- SQLPRNTRIV AEVGANVYEQ YSRMKTVIPI YARVTEYAVG V----PRPFY D-AEFKNEAQ GQGES-GTSI PCVDSPDIQY EMKHAWVEKL MHS-ALYLMP LATVHKPEMG TVKGERCRTI L-KLLMVQLD E----KTLSQ -NQIPD--LC QRTWLDASPL Q------IGV QVVLVGKKRG SKK-DYELAA DPVIIYFYQA PIIYHKADVF AGTVAKKSQA MRS------R SIGSQSVGDD GMQN--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWMAK D-DTLCPPCE EQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- SCNEVLKILA WWLCDSV-GD WQTLMNDACR SANPNYPVCI WKRELVPGL- --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRH -SSVDDNQC- VELLPYVCQR MDPWGHYVQI LAD-RLDLTR PVT---LQGL SILP---CPH ASGKEQDGAV -ERSQHYG-V YQEL----QN DHKSTIDANK NDNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NLIKQNEHSV TFV-ISDAQK RLICGVSSMV MRTFMTLACN PFFAKLFMEV VPMSYDYVPN HDIKI----- -IEVYAPENS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI -PPCCLTVSN LKGMI-PPCP ALTEITRTP- -FIV--MYVA I-LRNDIMQY CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-SPG DNGGVEAIRE DMFQVLVCK t55 LIVSDMRLII REG--NDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S ESPVWMFDKF MCQLTHSMVN --------GD LGRVVHDNWT FTKLKEGAPH CIAI--PYFM GIDMFHIQVF VGGN--LILP DPCYELSISV MYAGHASYNQ KRCINGLDNG DRQDSDPRKE HKIRNSVLLY R---QIGL-L -VIIKEANEL MGRLNHKEPK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- ---------- SQLPRNTRIV AEIGANVYEQ YSRMKTNIPI YARVTEYAVG V----ARPFY D-AEFKNEAQ GQGES-GTSI PCVDSPDIQY EMKHAWVEKL MHS-ALYLMP LSTVHKPEMG TVKGERCRTI L-KLLMVQLD E----KTLSQ -NQIPD--LC QRTWLDASPL Q------IGV QVVLVGKKRG SKK-DYELAA DPVIIYFYQA PIIHHKADVF AGTVAKKSQA MRS------R SIGSQSVGDD GMQN--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWMAK D-DTLCPPCE EQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- SCNEVLKILA WWLCDSV-GD WQTLMNDACR SANPNYPVCI WKRELVPGL- --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRH -SSVDDNQC- VELLPYNCQR MDPWGHYVQI LAD-RLDLTR PVT---LQGL SILP---CPH ASGKEQDGAV -ERSQHYG-V YQEL----QN DHKSTIDANK NDNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NLRKQNEHSV TFV-ISDAQK RLICGVSSMC MRTFMTLACN PFFAKLFMEV VPMSYDYVPN HDIKI----- -IEVYAPENS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI -PPCCLTVSN LKGMI-PPCP ALTEITRTP- -FIV--MYVA I-LRNDIMQY CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-SPG DNGGVEAIRE DMFQVLVCK t46 LIVSDMRLII REG--NDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S ESPVWMFDKF MCQLTHSMVN --------GD LGRVVHDNWT FTKLKEGAPH CIAL--PYFM GIDMFHIQVF VGGN--LILP DPCYELSISV MYAGHASYNQ KRCINGLDNG DRQDSDPRKE HKIRNSVLLY R---QIGL-L -VIIKEANEL MGRLNHKEPK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- ---------- SQLPRNTRIV AEIGANVYEQ YSRMKTNIPI YARVTEYAVG V----ARPFY D-AEFKNEAQ GQGES-GTSI PCVDSPDIQY EMKHAWVEKL MHS-ALYLMP LATVHKPEMG TVKGERCRTI L-KLLMVQLD E----KTLSQ -NQIPD--LC QRTWLDASPL Q------IGV KVVLVGKKRG SKK-DYELAA DPVIIYFYQA PIIHHKADVF AGTVAKKSQA MRS------R SIGSQSVGDD GMQN--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWMAK D-DALCPPCK EQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- SCNEVLKILA WWLCDSV-GD WQSLMNDACR SANPNYPVCI WKRELVPGL- --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRH -SSVDDNQC- VELLPYVCQR MDPWGHYVQV LAD-RLDLTR PVT---LQGL SILP---CPH ASGKEQDGAV -ERSQHYG-V YQEL----QN DHKSTIDANK NDNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NIRKQNEHSV TFV-ISDAQK RLICGVSSMC MRTFMTLACN PFFAKLFMEV VPMSYDYVPN HDIKI----- -IEVYAPDNS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI -PPCCLTVSN LKGMI-PPCP ALTEITRTP- -FIM--MYVA M-LRNDIMQY CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-APG DNGGVEAIRE DMFQVLVCK t67 LIVSDMRLII REG--NDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S ESPVWMFDKF MCQLTHSMVN --------GD LGRVVMDNWT FTKLKEGAPH CIAL--PYFM GIDMFHIQVF LGGD--LILP DPCYELSISV MYAGHASYNQ KRCINGLDNG DRQDSDPRKE HKIRNSVLLY R---QIGL-L -VIIKEANEL MGRLNHKERK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- ---------- SQLPRNTRIV AEIGANVYEQ YSRMKTNIPI YARVTEYAVG V----ARPFY D-AEFKNEAQ RQGES-GTSI PCVDSPDIQY EMKHAWVEKL MHS-ALYLMP LATVHKPEMG TVKGERCRTI L-KLLMVQLD E----KTLSQ -NQIPD--LC QRTWLDASPL Q------IGV QVVLVGKKRG SKK-DYELAA DPVIIYFYQA PIIHHKADVF AGTVAKKAQA MRS------R SIGSPSVGDD GMQN--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWVAK D-DTLCPPCK EQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- SCNEVLKILA WWLCDSV-GD WQTLMNDACS SANPNYPVCI WKRELVPGL- --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRM -SSVDDNQC- VELLPYVCQR MDPWGHYVQI LAD-RLVLTR PVT---LHGL SILP---CPH ASGKEQDGAE -ERSQHYG-V YQEL----QN DAKSTIDANK NPNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NIRKLNEHSV TFV-ISDAQK RLICGVSSMC MRTFMTLACN PFFAKLFMEV VPMSYDYVPN HDIKI----- -IEVYAPENS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI -PPCCLTVSN LKGMI-PPVP ALTEITRTP- -FIV--MYVA I-LRNDIMQY CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-APG DNGGVEAIRE DMFQVLVCK t57 LIVSDMRLII REG--NDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S ESPVWMFDKF MCQLTHSMVN --------GD LGRVVMDNWT FTKLKEGAPH CIAL--PYFM GIDMFHIQVF IGGD--LILP DPCYELSISV MYAGHASYNQ KRCINGLDNG DRQDSDPQKE HKIRNSVLLY R---QIGL-L -VIIKEANEI MGRLNHKERK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- ---------- SQLPRNTRIV AEIGANVYEQ YSRMKTNIPI YARVTEYAVG V----ARPFY D-AEFKNEAQ GQGES-GTSI PCVDSPDIQY EMKHAWVEKL MHS-ALYLMP LATVHKPEMG TVKGERCRTI L-KLLMVQLD E----KTLSQ -NQIPD--LC QRTWLDASPL Q------IGV QVVLVGKKRG SKK-DYELAA DPVIIYFYQA PIIHHKADVF AGTVAKKAQA MRS------R SIGSPSVGDD GMQK--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWVAK D-DTLCPPCK DQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- SCNEVLKILA WWLCDSV-GD WQTLMNDACS SANPNYPVCI WKRELVPGL- --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRV -SSVDDNQC- VELLPYLCQR MDPWGHYVQI LAD-RLNLTR PVT---LQGL SILP---CPH ASGKEQDGAV -ERSQHYG-V YQEL----QN DAKSTIDANK NPNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NIRKQNEHSV TFV-ISDAQK RLICGVSSMC MRTFMTLACN PFFAKLFMEV VPMSYDYVPN HDIKI----- -IEVYAPENS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI -PPCCLTVSN LKGMI-PPVP ALTEITRTP- -FIV--MYVA I-LRNDIMQY CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-APG DNGGVEAIRE DMFQVLVCK t56 LIVSEWRLFI RDG--HDDAI DEM-CCEANE LNNDIEKLVA SMVKGFRGHD SQARNNAECI I--AAPFIGV ---RELFKR- YIKCVEGAGC VYTVA-AP-S ESPVWMFDKF MCHLTHSMVV --------GD LGRVLRDNTT FAKLKEGAPV CISL--PYFL GMDMFHQQVF MGGN--LILP DPCYELSISV MYAGHASYNQ KRCLNNLDQG DREDSSHRKE HKIRRSVLLY Q---QIGC-L -VITRERNEL MGRLNHKEPR DGVIFPHR-- -GAQGPKQL- ---------- -------ANG LFEK-E-E-- ----NRPMVQ DAGSVPER-- --AQWPGQQR AWCSSDDKA- ---------- SKLPRNTKMV AEIGANVYEQ ASRMKTNIPI YAHVTEYALG V----DRPFY D-SEFKNEAQ GQGES-GTSI PCVDSPDVQY ELKHAWVDKL MHT-ALYLMP LPTVHKPKMG TVKGERCRAM L-KLNMMQLD E----KTLSQ -NQIIK--LC QRTWLDASPL Q------IGV NCVLPGKKGG SNK-DYELAA DPVIIYFYQA PIIHHKADVF SGTVAKKAQA MRQ------Q STGSQSVGDD GTQV--MPLM QNLVCVWSKM VRKCMID-GQ EKREQTWMAK D-DKLCPPSQ EQGEK---AA EPTQWED--- ---------- ----ICTANV IKLLYCKDC- SCNEVLRVLS WWLCDSV-GD WRTLMSDACA LANPNPPVCV WKRELVPGL- --NRNVENLA KIIYFCPDEH -------DER KMWGKIFCLE W-EMDISHRH -SSVDDNVC- VEMLPFVCQK MDPWGHYVQI LAD-RLDLTR PVT---LQGL GILV---CPH ASGKEQDGAM -ERSQHYV-V YAEL----QN DHKNTINANK VPRKMLDWDT GKN-MDKGAK GMYHEIVLFF -------AEN NVKKQVEHSV IFI-IADAQK RLICGVSSMC LQTFMNLACN PFFAKLFMEV IPMRYDYQTN HDIKI----- -KEVYAPENS WETIKNV-EM VSGKAGKQIN HEPQLPEQVI -PPCSLTVSN LKGMM-PPCP AMTEITRTV- -III--MYYA I-LRNEIMQY CFYYKENRSV R--------- --INNTGQAE LPK------- ---------- ----HHVQNV FID-ELYPGH ---YHAAPKK N--ENV-GPG DNGGVEGIKE DMFQVLVCQ t13 GIVSECRMII RDE--HDDAI DEM-C----- -------LVA SMVKKLSGCE NQARNNHECA I--PPPFHGV ---REMFKRV YE-CMEGIGC VNTVAGNP-S ESSAWMFDKV MYQLTHSMVW --------GD LRRVVHDNVT FSKLKEGAPH CISH--PYFL GIDMFHIQVY SKGF--LTLP DPRYEISMSV MYSQHHSFSM KRCLNGLDHG DREESPHQIE HKMRKSVLIY N---PIGY-L -VIIKDANSM MGRLNHHESR AVVAFPLR-- -GAEGPKQV- ---------- -------MEG LFDK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG I----VKPFY D-NEWKQEAQ GQGEA-GACI PCVDSKDVQY ELKHAYVKKL MHT-SLYLMH IETCHKPVMG TVKGNRCRAI L-KLSMIQLD Q----KTLSQ -NQRAK--LC QRTWLDTSPL Q------MGM TLVLVGKKVG SKK-DYEVAA DPVIMYFYDA PMIMRPTDVF EGTNNKKAQA MRS------R STASQSIGDD DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK D-DTLCPQSR DQGEK---HH EPHHIRN--- ---------- ----KFSENV IKLLYCKDC- SCNKVLKILV WQLCESV-DD WQTLSSDACM HAEQNPPVCV WQRELVPQL- --NRNIEDLA RIIYFGPDEH -------DEG KMMNKIFALE W-EMDMSHRV -SSVDDNYI- IEMLPFVCDR VGPWGHYAQV LAD-QLHLTR PHT---LRDL YILAC--CPV ASGKEQDGNK -ERSQHYT-I YAEI----QN DHKNPISANN QPNKMLDWDT GKT-MDRGAA -MCGEIKLFY -------AEP NVWNQNDHSV MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFSKLFMEV VCMRYNYTSN HSIKI----- -FEVYADENS WENVRNF-EM VSGRAGAHIN HIPQLPEQAY -PPCKLTVSN LKGVA-ASCP AITEITRTA- -VIV--MYGS I-LRNDVMQY VFYYRENRSV R--------- --QNNTGNAE LPK------- ---------- ----HHMQDV FID-ELYLGP ---YHASPKK D--RFC-NPG DNGGMETYQE DMFQVLVCV t5 GIVSECRMII RDE--HDDAI DEM-C----- -------LVA SMVKKLSGCE NQARNNHECA I--PPPFHGV ---REMFKRV YE-CMEGIGC VNTVAGNP-S ESSAWMFDKV MYQLTHSMVW --------GD LRRVVHDNVT FSKLKEGAPH CISH--PYFL GIDMFHIQVY SKGF--LTLP DPRYEISMSV MYSQHHSFSM KRCLNGLDHG DREESPHQIE HKMRKSVLIY S---PIGY-L -VIIKDANSM MGRLNHHESR AIVAFPLR-- -GAEGPKQV- ---------- -------MEG LFDK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG I----VKPFY D-NEWKQEAQ GQGEA-GACI PCVDSKDVQY ELKHAYVKKL MHT-SLYLMH IETCHKPVMG TVKGNRCRAI L-KLSMIQLD Q----KTLSQ -NQRAK--LC QRTWLDTSPL Q------MGM TLVLVGKKVG SKK-DYEVAA DPVIMYFYDA PMIMRPTDVF EGTNNKKAQA MRS------R STASQSIGDD DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK D-DTLCPQSR DQGEK---HH EPHHIRN--- ---------- ----KFSENV IKLLYCKDC- SCNKVLKILV WQLCESV-DD WQTLSSDACM HAEQNPPVCV WQRELVPQL- --NRNIEDLA RIIYFGPDEH -------DEG KMMNKIFALE W-EMDMSHRV -SSVDDNYI- IEMLPFVCDR VGPWGHYAQV LAD-QLHLTR PHT---LRDL YILAC--CPV ASGKEQDGNK -ERSQHYT-I YAEI----QN DHKNPISANN QPNKMLDWDT GKT-MDRGAA -MCGEIKLFY -------AEP NVWNQNDHSV MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFSKLFMEV VCMRYNYTSN HSIKI----- -FEVYADENS WENVRNF-EM VSGRAGAHIN HIPQLPEQAY -PPCVLTVSN LKGVA-ASCP AITEITRTA- -VIV--MYGS I-LRNDVMQY VFYYRENRSV R--------- --QNNTGNAE LPK------- ---------- ----HHMQDV FID-ELYLGP ---YHASPKK D--RFC-NPG DNGGMDTYQE DMFQVLVCV t38 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCE DQARNNHECA I--SHPFHGV ---REMFKR- YE-CMEGIGC VNTVAGNP-S ESSAWMFDKF MYQLTHSMVW --------GD LGRVYYDNIT FSKLKEGAPH CISQ--PFFL GIDMFHIQVY FKGS--LTLP DPRYELSMSV MYSQHHSFSQ KRCLNGLDVG DREESPHHID HKMRKSVLIY N---PIGY-L -VIIKNANSL MGRLNHHESR LIVTFPLR-- -GAEGPKQV- ---------- -------MEG LFLK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG I----VKPFY D-SEWKQEAQ GQGEA-GACI PCVDSPDVQY ELKHAYVKKL VHT-SLYLMH IDTCHKPVMG TIKGNRCRAI L-KLTMIQLD Q----KTLSQ -NQRAR--LC QRTWLDTSPL Q------MGM TIVLVGKKVG SKK-DYEIAA DPVIMYFYQA PMIMRPTDVF EGTNNRKAQA MRS------R STASQSIGDD DMLN--MPLT QNAMCVESEM VRKSQPD-GQ DKRGYTWVAK E-DDLCPQSG DQGEK---YA EPQHIRN--- ---------- ----QFSENV IKLLYCKDC- SCNKVLKILV WQLCESV-DD WQTLASDACV SAEPNPPVCV WQRELVPQL- --NRNAEDLA RIIYFGPDEH -------DEG KMMCKIFALE W-EMDMSHRM -SSVDDNYI- IEMLPFVCDR VGPWGHYAQI LAD-QLNLTR PHT---LREL YILA---CPV ASGKEQDGAQ -ERSQCYT-I YAEL----QN DHKSLISANH VPNKMLDWDT GKT-MDRGAA -MCAEIKLFY -------AEP NVWKQNDHSV MFI-ISDAQN KLIVANSIMV GQTFMAMACN PHFAKLFMEV VVMRYDYCSN HSIKI----- -FEVYADENS WENIRNF-EM VSGRAGAYIN HIPQLPEQAV -PPCHLTVSN LKGVA-ASCP AITEITRTA- -VIV--MYVS I-LRNDVMQY VFYYRENRIV R--------- --QNNTGVAE LPK------- ---------- ----HHMQDV FID-ELYLGP ---YHASPKK E--RFC-NVG DNGGMETIQE DMFQVLVCI t33 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCE DQARNNHECA I--SHPFHGV ---REMFKR- YE-CMEGIGC VNTVAGNP-S ESSAWMFDKF MYQLTHSMVW --------GD LGRVYYDNIT FSKLKEGAPH CISH--PFFL GIDMFHIQVY FKGS--LTLP DPRYELSMSV MYSQHHSFSQ KRCLNGLDVG DREESPHQIE HKMRKSVLIY N---PIGY-L -VIIKNANSL MGRLNHYESR AIVTFPLR-- -GAEGPKQV- ---------- -------MEG LFLK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- ---------- SHLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG I----VKPFY D-SEWKQEAQ GQGEA-GACI PCVDSPDVQY ELKHAYVKKL VHN-SLYLMH IDTCHKPAMG TVKGNRCRAI L-KLTMIQLD Q----KTLSQ -NQRAR--LC QRTWLDTSPL Q------MGM TIVLVGKKVG SKK-DYEIAA DPVIMYFYQA PMIMRPTDVF EGTNNRKAQA MRS------R STASQSIGDD DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK E-DTLCPQSG DQGEK---YA EPQHIRN--- ---------- ----KFSENV IKLLYCKDC- SCNKVLKILV WQLCESV-DD WQTLASDACV SAEPNTPVCV WQRELVPQL- --NRNAEDLA RIIYFGPDEH -------DEG KMMCKIFALE W-EMDMSHRM -SSVDDNYI- IEMLPFVCDR VGPWGHYAQI LAD-QLNLTR PHT---LREL YILA---CPV ASGKEQDGAK -ERSQCYT-I YAEL----QN DHKSLISANH VPNKMLDWDT GKT-MDRGAA -MCAEIKLFY -------AEP NVWKQNDHSV MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFAKLFMEV VVMRYDYCSN HSIKI----- -FEVYADENS WENIRNF-EM VSGRAGAHIN HIPQLPEQAC -PPCVLTVSN LKGVA-ASCP AITEITRTA- -VIV--MYVS I-LRNDVMQY VFYYRENRIV R--------- --QNNTGVAE LPK------- ---------- ----HHMQDV FID-ELYLGP ---YHASPKK E--RFC-NPG DNGGMETIQE DMFQVLVCV t100 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCE DQARNNHECA I--SHPFHGV ---REMFKR- YE-CMEGIGC VNTVAGNP-S ESSAWMFDKF MYQLTHSMVW --------GD LGRVYYDNIT FSKLKEGAPH CISH--PFFL GIDMFHIQVY FKGS--LTLP DPRYELSMSV MYSQHHSFSQ KRCLNGLDVG DREESPHQIE HKMRKSVLIY N---PIGY-L -VIIKNANSL MGRLNHHESR AIVTFPLR-- -GAEGPKQV- ---------- -------MEG LFLK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG I----VKPFY D-SEWKQEAQ GQGEA-GACI PCVDSPDVQY ELKHAYVKKL VHT-SLYLMH IDTCHKPVMG TVKGNRCRAI L-KLTMIQLD Q----KTLSQ -NQRAR--LC QRTWLDTSPL Q------MGM TIVLVGKKVG SKK-DYEIAA DPVIMYFYQA PMIMRPTDVF EGTNNRKAQA MRS------R STASQSIGDD DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK E-DTLCPQSE DQGEK---YA EPQHIRN--- ---------- ----KFSENV IKLLYCKDC- SCNKVLKILV WQLCESV-DD WQTLASDACV SAEPNPPVCV WQRELVPQL- --NRNAEDLA RIIYFGPDEH -------DEG KMMCKIFALE W-EMDMSHRM -SSVDDNYI- IEMLPFVCDR VGPWGHYAQI LAD-QLNLTR PHT---LREL YILA---CPV ASGKEQDGAQ -ERSQCYT-I YAEL----QN DHKSLISANH VPNKMLDWDT GKT-MDRGAA -MCAEIKLFY -------AEP NVWKQNDHSV MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFAKLFMEV VVMRYDYCSN HSIKI----- -FEVYADENS WENIRNF-EM VSGRAGAHIN HIPQLPEQAC -PPCVLTVSN LKGVA-ASCP AITEITRTA- -VIV--MYVS I-LRNDVMQY VFYYRENRIV R--------- --QNNTGVAE LPK------- ---------- ----HHMQDV FID-ELYLGP ---YHASPKK E--RFC-NPG DNGGMETIQE DMFQVLVCV t34 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCE DQARNNHECA I--SHPFHGV ---REMFKR- YE-CMEGIGC VNTVAGNP-S ESSAWMFDKF MYQLTHSMVW --------GD LGRVYYDNIT FSKLKEGAPH CISH--PFFL GIDMFHIQVY FKGS--LTLP DPRYELSMSV MYSQHHSFSQ KRCLNGLDVG DREESPHQIE HKMRKSVLIY N---PIGY-L -VIIKNANSL MGRLNHHESR AIVTFPLR-- -GAEGPKQV- ---------- -------MEG LFLK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG I----VKPFY D-SEWKQEAQ GQGEA-GACI PCVDSPDVQY ELKHAYVKKL VHT-SLYLMH IDTCHKPVMG TVKGNRCRAI L-KLTMIQLD V----KTLSQ -NQRAR--LC QRTWLDTSPL Q------MGM TIVLMGKKVG SKK-DYEIAA DPVIMYFYQA PMIMRPTDVF EGTNNRKAQA MRS------R STASQSIGDD DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK E-DTLCPQSG DQGEK---YA EPQHIRN--- ---------- ----KFSENV IKLLYCKDC- SCNKVLKILV WQLCESV-DD WQTLASDACV SAEPNPPVCV WQRELVPQL- --NRNAEDLA RIIYFGPDEH -------DEG KMMCKIFALE W-EMDMSHRM -SSVDDNYI- IEMLPFVCDR IGPWGHYAQI LAD-QLNLTR PHT---LREL YILA---CPV ASGKEQDGAQ -ERSQCYT-I YAEL----QN DHKSLISANH VPNKMLDWDT GKT-MDRGAA -MCAEIKLFY -------AEP NVWKQNDHSV MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFAKLFMEV VVMRYDYCSN HSIKI----- -LEVYADENS WENIRNF-EM VSGRAGAHIN HIPQLPEQAC -PPCVLTVSN LKGVA-ASCP AMTEITRTA- -VIV--MYVS I-LRNDVMRY VFYYRENRIA R--------- --QNNTGVAE LPK------- ---------- ----HHMQDI FID-ELYLGP ---YHASPKK Q--RFC-NPG ENGGMETIRE DMFQVLVCV t84 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCD DQARNNHECA I--SHPFHGV ---REMFKR- YE-CMEGIGC VNTVAGNP-S ESSAWMFDKF MYQLTHSMVW --------GD LGRVYYDNIT FSKLKEGAPH CISH--PFFL GIDMFHIQVY FKGS--LTLP DPRYELSMSV MYSQHHSFSQ KRCLNGLDVG DREESPHQIE HKMRKSVLIY N---PIGY-L -VIIKNANSL MGRLNHHESR AIVTFPLR-- -GAEGPKQV- ---------- -------MEG LFLK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG I----VKPFY D-SEWKQEAQ GQGEA-GACI PCVDSPDVQY ELKHAYVKKL VHT-SLYLMH IDTCHKPVMG TVKGNRCRAI L-KLTMIQLD V----KTLSQ -NQRAR--LC QRTWLDTSPL Q------MGM TIVLMGKKVG SKK-DYEIAA DPVIMYFYQA PMIMRPTDVF EGTNNRKAQA MRS------R STASQSIGDD DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK E-DTLCPQSG DQGEK---YA EPQHIRN--- ---------- ----KFSENV IKLLYCKDC- SCNKVLKILV WQLCESV-DD WQTLASDACV SAEHNPPVCV WQRELVPQL- --NRNAEDLA RIIYFGPDEH -------DEG KMMCKIFALE W-EMDMSHRM -SSVDDNYI- IEMLPFVCDR IGPWGHYAQI LAD-QLNLTR PHT---LREL YILA---CPV ASGKEQDGAK -ERSQCYT-I YAEL----QN DHKSLIAANH VPNKMLDWDT GKT-MDRGAA -MCAEIKLFY -------AEP NVWKQNDHSV MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFAKLFMEV VVMRYDYCSN HSIKI----- -LEVYADENS WENIRNF-EM VSGRAGAHIN HIPQLPEQAC -PPCVLTVSN LKGVA-ASCP AMTEITRTA- -VIV--MYVS I-LRNDVMRY VFYYRENRIA R--------- --QNNTGVAE LPK------- ---------- ----HHMQDI FID-ELYLGP ---YHASPKK E--RFC-NPG ENGGMETIRE DMFQVLVCV t92 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCE DQARNNHECA I--SHPFHGV ---REMFKR- YE-CMEGIGC VNTVAGNP-S ESSAWMFDKF MYQLTHSMVW --------GD LGRVYYDNIT FSKLKEGAPH CISH--PFFL GIDMFHIQVY FKGS--LTLP DPRYELSMSV MYSQHHSFSQ KRCLNGLDVG DREESPHQIE HKMRKSVLIY N---PIGY-L -VIIKNANSL MGRLNHHESR AIVTFPLR-- -GAEGPKQV- ---------- -------MEG LFLK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG I----VKPFY D-SEWKQEAQ GQGEA-GACI PCVDSPDVQY ELKHAYVKKL VHT-SLYLMH IDTCHKPVMG TVKGNRCRAI L-KLTMIQLD V----KTLSQ -NQRAR--LC QRTWLDTSPL Q------MGM TIVLMGKKVG SKK-DYEIAA DPVIMYFYQA PMIMRPTDVF EGTNNRKAQA MRS------R STASQSIGDD DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK E-DTLCPQSG DQGEK---YA EPQHIRN--- ---------- ----KFSENV IKLLYCKDC- SCNKVLKILV WQLCESV-DD WQTLASDACV SAEPNPPVCV WQRELVPQL- --NRNAEDLA RIIYFGPDEH -------DEG KMMCKIFALE W-EMDMSHRM -SSVDDNYI- IEMLPFVCDR IGPWGHYAQI LAD-QLNLTR PHT---LREL YILA---CPV ASGKEQDGAQ -ERSQCYT-I YAEL----QN DHKSLISANH VPNKMLDWDT GKT-MDRGAA -MCAEIKLFY -------AEP NVWKQNDHSV MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFAKLFMEV VVMRYDYCSN HSIKI----- -LEVYADENS WENIRNF-EM VSGRAGAHIN HIPQLPEQAC -PPCVLTVSN LKGVA-ASCP AMTEITRTA- -VIV--MYVS I-LRNDVMRY VFYYRENRIA R--------- --QNNTGVAE LPK------- ---------- ----HHMQDI FID-ELYLGP ---YHASPKK E--RFC-NPG ENGGMETIRE DMFQVLVCV t3 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCE QQARNNRECA T--SIPFLGV ---RELWKR- YE-CMEGIGC VNTVAGKP-S QSSVWMFDRF MYKLTHSMVW --------GD LGRVYWDNLT FQKLKEGAPH CIAV--PNYL NIDMFHIQVF YKGP--LTLP DPHYELSMSV MYAQHHSFSQ KRCLNALDHG DREESPHQIE HKMRKSVLLY N---PIGY-L -VIIKNANSM MGRLNHHESC AIVMFPLR-- -GAEGPKQI- ---------- -------IEG LFDK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- ---------- SRLPRNDPVV HEVGANVYQQ ISRMKTSIPI YAHVTEYAVG I----IKPFY D-NEWKQEAQ GQGEA-GASI PCVNSPDVQY ELKHAHIKKL MHT-SLYLMH LGTCHKPVMG TVKGDRCRAI L-KLSMIQLD Q----KTLSQ -NVRAR--LC QRTWLDTSPL Q------MGM VAVLLGKKLG SKK-DYEIAA DPVIKYFYQA PIIMRRTDVF EGTDDQKAQA MRV------R STASQSIGDD DMLI--MPLV QNAMCVKSEM VRKCQPD-GP DKRGYTWMAK D-DTLCPVSA GQGEK---YA EPHQIKN--- ---------- ----KFSVNV IKLLYCKDC- SCNKVLKILV WQLCESV-DD WVTLASDACL SAEPQPPVCV WQRELVPQL- --NRNVEDLA RVIYFGPDEH -------DEG KMMPKIFKLA W-EMDMSHRA -SSVDDNYV- IEMLPFVCHR VGPWGHYAQV QAD-QQNLTR PHT---LADL FILS---CPV ASGKEQDGAT -ERSQYYV-V YAEL----QN DHKSPISANK HPDKMLDWDT GKS-MDRGAA -MCREIKLFY -------AEP NVWKQNDHSV VFI-ISDAVN RLICANSIMC GLTFMAMACN PHFAKLFMEV VCMRYDYFSN HSIKI----- -FEVYAEDDS WENICNF-EM MSGRTGAQIN HIPQLPEQVC -PPCHLTVSN LKGCC-ASVP AITEITRTV- -VIC--MYVS I-LCNVVMQY VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- ----HHMQDI FID-ELYPGQ ---YHASPKK K--KIC-GPG DNGGMETIEE DMFQVLVCV t22 GIVSECRVII RDQ--SDDAI DER-Y----- -------LVA SVVKRLSGCE NQARNNRECA I--SIPFLGV ---RELWKR- YE-CMEGIGC VNTVAGTP-S ESSVWMFDRF MYKLTHSMVW --------GD LGRVYVDNQT FSKLKEGAPH CISA--PYYL NIDMFHIQVF YKGS--LTLP DPRYELSMSV MYAQHHSFSQ KRCLNALDYG DREESPHQIE HKMRKSVLLY Y---TIGW-L -VIIKNADSM MGRLNHHESC AMVMFPLR-- -GAEGPKQI- ---------- -------MEG LFDK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- ---------- SQLPTNDPIV PEVGANVYQQ VSRMKTSIPI YAHVTEYAVG I----VKPFY D-NEWKQEAQ GQGEA-GASI PCVNSPDVQY ELKHAHIKKL MHT-SLYLMH LKTCHKPVMG TVKGDRCRAI L-KLKMIQLD Q----KTLSQ -NQAAR--LC KRTWLDTSPL Q------MGM NVVLIGQKLG SKH-DYEIAA DPVIMYFYQA PIIMRRTDVF EGTDDQKAQA MRL------R STASRSIGDD DMLI--MPLV QNAMCVNPEM VRKCQPD-GQ DKRGYTWMAK N-DTLCPVSA GQGEC---YA EPHQTCN--- ---------- ----KFRVNV IKLLYCKDC- SCNKVLKILK WQLCESV-DD WQTLASDACI LAKPQPPVCV WQRELVPQL- --NRNVEDLA RVIYFGPDEH -------DEG KMMSKIFNLE W-EMDMSHRT -SSVDDNYV- IDMLPFICHR VGPWGHYAQV LAD-QINLTR PHT---LRDL FILA---CPV ASGKEQDGAT -ERSQHYV-V YAEL----QN DHKSPISANK HPNKLLDWDT GKT-MDRGAA -MCHEIKLFH -------AEP NVWRQNDHSV MFI-ISDASN RLICANSIMC GLTFMAMACN PHFAKLFMEV VCMRYEYFSN HSIKI----- -FEVYAEENS WENVCNF-EM MSGRAGAQIN HIPQLPEQVC -PPCVLTVSN LKGCC-ASCP ANTEITRTV- -VIC--MYVS I-LRNVVMQY VFYYKENRSV R--------- --QNNTGAAE LPK------- ---------- ----HHMQDI FID-ELYPGL ---YHASPKK K--KVC-GPG DNGGMETIQE DMFQVLVCV t64 GIVSECRMII RDE--TDDAI DEV-C----- -------LVA SMVKKLSGCE NQTRNNRECA IT-SPPFIGV ---RELFKR- YG-CMEGIGC VNTVAGMP-S ESSVWMFDRF MYKLTHSMVW --------GD LRRVFPDNAT FSKLKEGAPH VISH--PYFL GIDMFHDQVV YRGP--LTLP DPRYELSMSV MYAQHHSFSQ KRCLNALDHG DTQESPHGIE HNMRNSVLLY N---PIGF-L -VIIKNINSM MGRLNHHESR AVVGFPLR-- -GAENPKQI- ---------- -------MEG HFDK-E-E-- ----ARPIVQ DAASVPGK-- --AQWVGRVR AWCSSDVKA- ---------- SKLPRNDPIV PEVGANVYQQ LSRMKTNIPI YAHVTEYAVG I----VKPFY D-NEWKQEAQ GVGEA-GASI PCVDSPDVQY ELKHAEVRKL MHD-SLYLMH LETCHKPIMG TVKGDRCRAI L-KLPMIQLD Q----KTLSH -NQPRR--LC QRTWLDASPL Q------VGM NIMLMGKKAP SKK-DYEIAA DPVIMYFYQA PIIMRRTDVF EGTHNKKAQA MRV------R STASQSIGDD DMLK--MPLV QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQSA NQGEK---YA EPHVIGD--- ---------- ----KVSVNV IKLLYCKDC- SCNQVLKILV WQLCESV-DD WQTLTSDPCM SAQVNPPVCV WQRELVPQL- --NRNVEDLA RIIYFGPDEH -------DEG KMMSKIFALE W-EMDMSHRV -SSVDDNFV- IEMLPFVCHR IGPWGHYAQM LAD-QIVLTR PHT---LRDL FILS---CPV ASGGEQDGNT -ERSQHYI-V YAEL----QN DHKSPISANK HLNKMLDWDT GKT-MDREAT -MCREMKLFY -------AET NLWKQNDHSV MFI-ISDAQN RLICGNSIMC GLTFMAMACN PHFAKLFMEV VCVHYDYGAN HDIKI----- -FEVYAAENS WENARNF-EM MSGRAGAEIN H-PQLPEQVC -PPCHLTVSN LKGVC-ASCP AITEITRTV- -VIC--MYVS I-LRNDVMQY VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- ----HHMQDI VID-ELYPGP ---YHATPKK H--RFC-GTG DNGGMQAIQE DMFQVLVCV t18 GIVSECRMII RDE--TDDAI DEV-C----- -------LVA SMVKKLSGCE NQTRNNRECA IT-SPPFIGV ---RELFKR- YG-CMEGIGC VNTVAGMP-S ESSVWMFDRF MYKLTHSMVW --------GD LRRVFPDNAT FSKLKEGAPH VISH--PYFL GIDMFHEQVV YRGP--LTLP DPRYELSMSV MYAQHHSFSQ KRCLNALDHG DTQESPHGIE HNMRNSVLLY N---PIGF-L -VIIKNINSM MGRLNHHESR AVVGFPLR-- -GAENPKQI- ---------- -------MEG HFDK-E-E-- ----ARPIVQ DAASVPGK-- --AQWVGRVR AWCSSDVKA- ---------- SKLPRNDPIV PEVGANVYQQ LSRMKTNIPI YAHVTEYAVG I----VKPFY D-NEWKQEAQ GVGEA-GASI PCVDSPDVQY ELKHAEVRKL MHD-SLYLMH LETCHKPIMG TVKGDRCRAI L-KLPMIQLD Q----KTLSH -NQPRR--LC QRTWLDASPL Q------VGM NIMLKGKKAP SKK-DYEIAA DPVIMYFYQA PIIMRRTDVF EGTHNKKAQA MRV------R STASQSIGDD DMLK--MPLV QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQSA NQGEK---YA EPHVIGD--- ---------- ----KVSVNV IKLLYCKDC- SCNQVLKILI WQLCESV-DD WQTLTSDPCM SAQVNPPVCV WQRELVPQL- --NRNVEDLA RIIYFGPDEH -------DEG KMMFKIFALE W-EMDMSHRV -SSVDDNFV- IEMLPFVCHR IGPWGHYAQM LAD-QIVLTR PHT---LRDL FILS---CPV ASGGEQDGNT -ERSQHYI-V YAEL----QN DHKSPISANK HLNKMLDWDT GKT-MDREAT -MCREMKLFY -------AET NLWKQNDHSV MFI-ISDAQN RLICGNSIMC GLTFMAMACN PHFAKLFMEV VCVHYDYGAN HDIKI----- -FEVYAAENS WENARNF-EM MSGRAGAEIN H-PQLPEQVC -PPCHLTVSN LKGVC-ASCP AITEITRTV- -VIC--MYVS I-LRNDVMQY VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- ----HHMQDI FID-ELYPGP ---YHATPKK H--RFC-GTG DNGGMQAIQE DMFQVLVCV t68 GIVSECRMII RDE--TDDAI DEV-C----- -------LVA SMVKKLSGCE NQTRNNRECA I--SPPFIGV ---RELFKR- YG-CVEGIGC VNTVAGMP-S ESSVWMFDRF MYKLTHSMVW --------GD LRRVFPDNAH FSKLKEGAPH VISH--PYFL GIDMFHDQVV YRGP--LTLP DPRYELSMSV MYAQHHSFSQ KRCLNALDHG DTQESPHGIE HNMRNSVLLY N---PIGF-L -VIIKNINSM MGRLNHYESR AVVGFPLR-- -GAENPKQI- ---------- -------MEG HFDK-E-E-- ----ARPIVQ DAASVPEK-- --AQWVGRVR AWCSSDVKA- ---------- SKLPRNDPIV PEVGANVYQQ ISRMKTNIPI YAHVTEYAVG I----VKPFY D-NEWKQEAQ GVGEA-GASI PCVDSPDVQY ELKHAEVRKL MHD-SLYLMH LETCHKPIMG TVKGDRCRAI L-KLPMIQLD Q----KTLSH -NQPRR--LC QRTWLDASPL Q------VGM NIMLMGKKAP SKK-NYEIAA DPVIMYFYQA PIIMRRTDVF EGTHNKKAQA MRV------R STASQSIGDD DMLK--MPLR QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQSA NQGEK---YA EPVVIGD--- ---------- ----KVSVNV IKLLYCKDC- SCNQVLKILV WQLCESV-DD WQTLTSDPCM SAQVNPPVCV WQRELVPQL- --NRNVEDLA RIIYFGPDEH -------DEG KMMSKIFALE W-EMDMSHRV -SSVDDNFV- IEMLPFVCHR IGPWGHYAQM LAD-QIVLTR PHT---LRDL FILS---CPV ASGGEQDGNT -ERSQHYI-V YAEL----QN DHKSPISANK HLNKMLDWDT GKT-MDREAT -MCREMKLFY -------AET NLWKQNDHSV MFI-ISDAQN RLICGNSIMC GLTFMAMACN PHFAKLFMEV VCVHYDYGAN HDIKI----- -FEVYAVENS WENARNF-EM MSGRAGAEIN H-PQLPEQVC -PPCHLTVSN LKGVC-ASCP AITEITRTV- -VIC--MYVS I-LRNDVMQY VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- ----HHMQDI FID-ELYPGP ---DHATPKK H--RFC-GTG DNGGMQTIQE DMFQVLVCV t28 GIVSECRMII RDE--ADDAI DEM-C----- -------LVA SMVKKLSGCE NQARNNRECA I--SPPFIGV ---RELFKR- YH-CMEGIGC VNTVAGMP-S ESSVWMFDQF MYKLTHSMIW --------GD LGRVFPDNAT FSKLKEGAPH VISH--PYFL GIDMFHAQVF YRGS--LTLP DPRYELSMSV MYSQHHSFSQ KRCLNPLDHG DRQESSHGIE HNMRSSVLLY N---PIGF-L -VIIDNINPM MGRLNHHESR AIVTFPLR-- -GAEHPKQI- ---------- -------VEG HFDK-E-E-- ----ARPIVK DAASVPGK-- --AQWVGRIR AWCSSDVKA- ---------- SQLPRNDPIV PEVGANVYQQ DSRMKTNIPI YAHVTEYACG I----VKPFY E-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHAEVKKL MHD-SLYLMH LETCHKPIMG TVKGDRCRAI L-KLSMIQLD E----KTLSQ -NQRPG--LC QRTWLDANPL Q------MGM NTMLVGKKTP SKK-DYEIAA DPVIMYFYQA PIIMRRTDVF EGTNNKKSQA MRV------Q STSSQSIGDD DMLN--MPLV QNAMCVSSEM VRKCQPD-GQ NKRGCTWMAK N-DTLCPQSG NQGEK---FA EPHQIRN--- ---------- ----KVSVNV IKLLYCKDC- SCNKMLKILV WQLCESV-DD WQTLASDPCV SAEPNSPVCV WQRELVPQL- --NRNVEDLA RIIYFGPDEH -------DEG KMMSKIFALE W-EMDMSHRI -SSVDYNYV- IEMLPFVYHR VGPWGHYAQM LSD-QIVLTR PHT---LRDL FILA---CRV ASGGEVDGNT -ERSQHYI-V YAEL----QN DHKSPISANK VLNKMLDWDT GKT-MDREAA -MCREIKLFY -------AET NVWKQNDHSV MFI-ISDAQN RLICGNSIMC GITFMAMACN PHFAKLFMEV ACMRYDYFAN PDIKI----- -FEVYAAENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC -PPCHLTVSN LKGVC-ASCP DITEITRTV- -VIC--MYVS I-LRNDVMQY VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- ----HHMQDI FID-ELYPGH ---YHATPKK N--RYC-GPG DNGGMQVIEE DMFQVLVYV t82 GIVSECRMII RDE--ADDAI DEM-C----- -------LVA SMVKKLSGCE NQARNNRECA I--SPPFIGL ---RELFKR- YV-CMEGIGC VNTVAGMP-S ESSVWMFDRF MYKLTHSMVW --------GD LGRVFPDNCT FSKLKEGAPH VISH--PYFL GIDMFHGQVV YRGS--LTLP DPRYELSMSV MYSQHHSFSQ KRCLNPLDHG DRQESPHGIE HNMRSSVLLY N---PIGF-L -VIIDNINPM MGRLNHHESR AIVTFPLR-- -GAEHPKQI- ---------- -------MEG HFDK-E-E-- ----ARPIVQ DAASVPGE-- --AQWVGHIR AWCSSDIKA- ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI YAHTTEYACG I----VKPFY E-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHANVKKL MHD-SLYLMH LETCHKPIMG TVKGDRCRAI L-KLNMIQLD Q----KTLSQ -NQRPG--LC QRTWLDASPL Q------MGM HIMLVGKKTP SKK-DYEIAA DPVIMYFYQA PIIMQRTDVF EGTNNKKSQA MRV------H STSSQSIGDD DMLN--MPLV QNAMCVSSEM VRKCQPD-GQ NKRGYTWMAK H-DTLCPQSG NQGEK---YA EPHQIRN--- ---------- ----KVSVNV IKLLYCKDC- SCNKVLKILV WQLCESV-DD WQTLASDPCM SAEPNPPVCV WQRELVPQL- --NRNVEDLA RTIYFGPDEH -------DEG KMMSKIYALE W-EMDMSHRV -SSVDYNYV- IEMLPFVYHR VGPWGHYAQM LAD-QIVLTR PHT---LRDL FILA---CPV ASGGEQDGNA -ERSQHYI-V YAEL----VN DHKSPISANK VLNKMLDWDT GKT-MDREAA -MCRDIKLFY -------AET NVWKQNDHSV MFI-ISDAQN RLICGNSIMC GLTFMAMACN PHFAKLFMEV ACMRYDYFAN PDIKI----- -FEVYADENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC -PPCHLTVSN LKGVC-ASCP DITEITRTV- -VIC--MYIS I-LRNDVMQY VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- ----HHMQDI FID-ELYPGH ---YHATPKK N--RFC-GPG DNGGMQVIEE DMFQVLVYV t41 GIVSECRMII RDE--ADDAI DEM-C----- -------LVA SMVKKLSGCE NQARNNRECA I--SPPFIGL ---RELFKR- YV-CMEGIGC VNTVAGMP-S ESSVWMFDRF MYKLTHSMVW --------GD LGRVFPDNCT FSKLKEGAPH VISH--PYFL GIDMFHGQVV YRGS--LTLP DPRYELSMSV MYSQHHSFSQ KRCLNPLDHG DRQESPHGIE HNMRSSVLLY N---PIGF-L -VIIDNINPM MGRLNHHESR AIVTFPLR-- -GAEHPKQI- ---------- -------MEG HFDK-E-E-- ----ARPIVQ DAASVPGE-- --AQWVGHIR AWCSSDIKA- ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI YAHTTEYACG I----VKPFY E-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHANVKKL MHD-SLYLMH LETCHKPIMG TVKGDRCRAI L-KLNMIQLD Q----KTLSQ -NQRPG--LC QRTWLDASPL Q------MGM HIMLVGKKTP SKK-DYEIAA DPVIMYFYQA PIIMQRTDVF EGTNNKKSQA MRV------H STSSQSIGDD DMLN--MPLV QNAMCVSSEM VRKCQPD-GQ NKRGYTWMAK H-DTLCPQSG NQGEK---YA EPHQIRN--- ---------- ----KVSVNV IKLLYCKDC- SCNKVLKILV WQLCESV-DD WQTLASDPCM SAEPNPPVCV WQRELVPQL- --NRNVEDLA RTIYFGPDEH -------DEG KMMSKIYALE W-EMDMSHRV -SSVDYNYV- IEMLPFVYHR VGPWGHYAQM LAD-QIVLTR PHT---LRDL FILA---CPV ASGGEQDGNA -ERSQHYI-V YAEL----VN DHKSPISANK VLNKMLDWDT GKT-MDREAA -MCRDIKLFY -------AET NVWKQNDHSV MFI-ISDAQN RLICGNSIMC GLTFMAMACN PHFAKLFMEV ACMRYDYFAN PDIKI----- -FEVYADENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC -PPCHLTVSN LKGVC-ASCP DITEITRTV- -VIC--MYIS I-LRNDVMQY VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- ----HHMQDI FID-ELYPGH ---YHATPKK N--RFC-GPG DNGGMQVIEE DMFQVLVYV t71 GIVSECRMII RDQ--SDDAI DEMVV----- -------LVA SMVKEMSGCE NQARNNSECA I--SPQFIGV ---RELFKR- YR-CMEGIGC VNTVAGAP-S ESSVWMFDRF MYKLTHSMVW --------GD LARVFHDNGT FAKLKEGAPH VISV--PYFL GIDMFHGQVF LRGS--LTLP DPRYELSMSV MYAQHHSFSQ KRCLNHLDHG DRQESPHGIE HNMRKSVLLY N---PQGY-L -VIIKNVNSM MGRLNHHESR AVVTFPLR-- -GADNPRQI- ---------- -------MEG HFDK-E-E-- ----ARPVVQ DAPGVPGK-- --AQWVGKVR AWCSSDVKA- ---------- SQLPRNDTIV PEVGANVYQQ VSRVKTHIPI YAHVTEYAVG I----VKPFY N-NEWKQEAQ GQGEA-GASI PCVDSPDDQY ELKHAEVKKL MHD-SLYLPH LETCHKPIMG TVKGGRCRAS L-KLKMIQLD Q----KTLSQ -NQQRR--LC QRTWLDSSPL Q------MGM KIMLQGKKTP SKK-DYEIAA DPAIMHFYRA PIIMRRTDVF EGTNNKKAQA MRI------R STASQSIGDD DMLN--MPLH QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQSP NQGEK---YA EPHQIRN--- ---------- ----KISVNV IKLLYCKDC- SCNQVLKILV WQLCESV-DD WQTLTSDPCV PAEPNPPVCV WQRELVPQL- --NRNVEDLA RIIYFGPDEH -------DEG KMMRKIFALE W-EMDMSHRD -SSVDDNYV- IEMLPFMCHR VGPWGHYAQQ LAD-RLVLTR PHT---LRDL FILA---CPV ASGGEQDGNT -ERSQHYI-V YAEL----QN DHKSPISANK HMNKMLDWDT GKT-MDFEAA -MCREIKLFY -------AET NVWKINDHSV MFI-VSDAQN RLICGNSIMC GLTFMAMACN PHFAKLFMEG VCMRYDYSAN HDIKI----- -FEVYADENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC NPPCHLTVSN LKGVC-ASCP AITEITRTV- -VIV--MWIS I-LRNDVMQY VVYYKENRSV R--------- --QNNTGKAE LPR------- ---------- ----HPMQEI FID-ELYPGH ---YHATPKK N--RFC-GPG DNGGMQSMQE DVFQVLVCV t94 GIVSECRMII RDQ--SDDAI DEMVC----- -------LVA SMVKELSGCE NQARNNRECA I--SPPFTGV ---RELFKR- YR-CMEGIGC VNTVAGIP-S ESSVWMFDRF MYRLTHSMVW --------GD LGRVFQDNAT FSKLKEGAPH VISH--PYFL GIDMFHGQVF YRGS--LTLP DPRYELSMSV MYAQHHSMSQ KRCLNTLDHG DRQESPHGIE HNMRKSVLLY D---PIGY-L -VIIKNVNSM MGRLNHHESR AKVTFPLR-- -GADNPRQI- ---------- -------MEG HFDK-E-E-- ----ARPVVQ DAPGVPEQ-- --AQWVGKMR AWCSSDVKA- ---------- SQLPRNDTIV PEVGANVYQQ VSRVKTNIPI YAHVTEYAVG I----VKPFY N-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHAEVKKL MHD-SLYLPH LETCHKPIMG TVKGGRCRAS L-KLNMIQLD Q----KTLSQ -NQQRR--LC QRTWLDASPL Q------MGM KIMLQGKKIP SKK-DYEIAA DPAIMYFYQA TIIMRRTDVF EGTNNKKAQA MRM------R STASQSIGDD DMLN--MPLH QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLYPQSA NQGEK---YA EPHQIRN--- ---------- ----KISVNV IKLLYCKDC- SCNKVLKILV WQLCESV-DD WQTLTSDPCV PAEPNPPVCV WQRELVPQL- --NRNVEDLA RIIYFGPDEH -------DEG KMMRKIFALE W-EMDMSHRD -SSVDDNYV- IEMLPFMCHR VGPWGHYAQQ LAD-KLVLTR PHT---LRDL FILA---CYV ASGGEQDGNT -ERSQHYI-V YAEL----QN DHKSPILANK VVNKMLDWDM GKT-MDFEAA -MCREIKLFY -------AET NVWKQNNHSV MFI-VSDAHN RLICGNSIMC ALTFMAMACN PHFAKLFMEA VCMRYDYSAN HDIKI----- -VEVYADENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC NPPCHLTVSN LKGVC-ASCP AITEITRTV- -VIV--MWIS I-LRNDVMQY VFYYKENRSV R--------- --QNNTGKAE LPR------- ---------- ----HPVQDI FID-ELYPGH ---YHATPKK V--RFC-GPG DNGGMQAIQE DVFQVLVCV t99 GIVSEVRMII RDE--SDDAI DEM-C----- -------LVA SMVKALSGCE NQARNNRECA I--SPPFIGV ---RELFKR- YG-CMEGIGC VNTVAGIP-S ESSIWMFDRF MYKLTHSMVW --------GD LGQVFPDVST FSKLKEGAPH VISQ--PYFL GIDMFHDQVF YRGS--LTLP DPRYELSMSV MYAQHHSFSQ KRCLNALDHG DRQESPHGIE HNMRKSVLLD S---PIGY-L -VIIKNINSM MGRLNHHESR AVFAFPLR-- -GAENPKQI- ---------- -------MEG HFDK-EKE-- ----ARPVVQ DAPGVPGK-- --AQWVGRIR AWCSSDVKA- ---------- SELPRNDAIV PEVGANVYQQ VSRMKTNIPI YAHATEYAVG I----VKPFY D-NEWKQEAQ GQGET-GASI PCVDSPDVQY EMKHAEVKKL MHD-SLYLMH IETCHKPIMG TVKGDRCRAT L-KLKMIQLD Q----KTLSQ -NQKRR--LC QRTWLDASPL Q------MGM KMMLQGKKTP SKK-DYEIAA DPVIMYFYQS PIIMRRTDVF EGTNNKKAQA MRV------R STASQSIGDD EMLN--MPLV QNAMCVPSEM VRKCQPD-GQ DKRGYTWMAK D-DPLCPQSA NVGEK---YA EPHQIRN--- ---------- ----KVSVNV IKLLYCKDC- SCNKVLKILV WQLCDSVQDD WQPLTSDPCV NAEPNPPVCV WQRELVPQL- --NRNVEDLA RIIYFGPDEH -------DEG KMMNKIFALE W-EMDMSHRD SSSVDDNYI- IEMLPFVCHR VGPWGHYAQQ LAD-RLVLTR PHT---LRDI FILA---CPV ASGGEQDGNT -ERSQHYI-V YAEL----QD DHKSPISANK HMRKMLDWDT GKT-MDREAA -VCREIKLFY -------AET NCWKQNDHSV MFI-VSDAQN RLICGSSIMC RLTFMAMAVN PHFAKLFMED VCMRYDYSPN HDIKI----- -FEVYADENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC -PPCHLTVSN LKGVC-ASCP AITEITRTV- -VIC--MYVS I-LRNDVMQY VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- ----HHMQDI FID-ELYPGR ---YHATPKK N--RYC-GPG DNGGMQPIQE DMFQVLVCV t40 GIVSECRMII RDE--SDDAI DEV-C----- -------LVA SMVKALSGCE NQARNNRECA I--SPPFIGV ---RELFKR- YG-CMEGIGC VNTVAGIP-S ESSVWMFDRF MYKLTHSMVW --------GD LGRVYPDNPT FSKLKEGAPH VISH--PYFL GIDMFHDQVF YRGS--LTLP DPRYELSMSV MYAQHHSFSQ KRCLNALDHG DRQESPHGIE HNMRKSVLLD S---SIGY-I -VIIKNLNSM MGRLNHHESR AVVTFPLR-- -GAEDPKQI- ---------- -------MEG HFDK-E-E-- ----ARPVVQ DAPGVPGK-- --AQWVGRVR AWCSSEIKT- ---------- SELPRNDAIV VEVGANVYQQ ISRMKTDIPI YAHATEYAVG I----VKPVY D-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHAEVKKL MHD-SLYLMH LETCHKPIMG TVKGDRCRAT L-KLKMIQLD Q----KTLSQ -NQKKR--LC QCTWLDACPL Q------MGM KIMLQGKKTP SKK-DYEIAA DPVIMYFYQS PIIMRRTDVF EGTNNKKAQA MRI------R STASQSIGDD EMLN--MPQV QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQSA NVGEK---YA EPHQIRN--- ---------- ----KVSTNV IKLLYCKDC- SCNKVLKILV WQLCDSV-DD WQTLTSDPCV SAEPNPPVCV WQRELVPQL- --NRNVEDLA RIIYFGPDEH -------DEG KMMRKIFALE W-EMDMSHRN -SSVDDNYV- IEMLPFMCHR VGPWGHYAQQ LAD-RLVLTR PHT---LRNI FILA---CPV ASGGEQDENT -ERSQHYI-V YAEL----QN DHKSPIHANK VMNKMLDWDT GKT-MDREAA -MCREIKLFV -------AET NVWKQNDHSV MFIVISDAQN RLICGSSIMC RLTFMAMAVN PHFAKLFMEE SCVRYDYSAN HDIKI----- -FEVYANENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC -PPCHLTVSN LKGVC-ASVP AITEITRTV- -VIC--MYIS I-LRNNVMQY VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- ----HHMQDI FID-ELYPGH ---YQATPKK D--KFC-GPG DNGGMQTIQE DMFQVLVCV t90 GIVSECRMII RDE--SDDAI DEV-C----- -------LVA SMVKALSGCE NQARNNRECA I--SPPFIGV ---RELFKR- YG-CMEGIGC VNTVAGIP-S ESSVWMFDRF MYKLTHSMVW --------GD LGRVYPDNPT FSKLKEGAPH VISH--PYFL GIDMFHDQVF YRGS--LTLP DPRYELSMSV MYAQHHSFSQ KRCLNALDHG DRQESPHGIE HNMRKSVLLD S---PIGY-I -VIIKNVNSM MGRLNHHESR AVVTFPLR-- -GAEDPKQI- ---------- -------MEG HFDK-E-E-- ----ARPVVQ DAPGVPGK-- --AQWVGRVR AWCSSEIKT- ---------- SELPRNDAIV PEVGANVYQQ ISRMKTDIPI YAHATEYAVG I----VKPVY D-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHAEVKKL MHD-SLYLMH LETCHKPIMG TVKGDRCRAT L-KLKMIQLD Q----KTLSQ -NQKKR--LC QCTWLDACPL Q------LGM KIMLQGKKTP SKK-DYEIAA DPVIMYFYQS PIIMRRTDVF EGTNNKKAQA MRV------R STASQSIGDD EMLN--MPLV QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQSA NQGEK---YA EPHQIRN--- ---------- ----KVSTNV IKLLYCKDC- SCNKVLKILV WQLCDSV-DD WQTLTSDPCV SAEPNPPVCV WQRELVPQL- --NRNVEDLA RIIYFGPDEH -------DEG KMMRKIFALE W-EMDMSHRN -SSVDDNYV- IEMLPFMCHR VGPWGHYAQQ LAD-RLVLTR PHT---LRNI FILA---CPV ASGGEQDENT -ERSQHYI-V YAEL----QN DHKSPINANK VMNKMLDWDT GKT-MDREAA -MCREIKLFV -------AET NVWKQNDHSV MFIVISDAQN RLICGSSIMC GLTFMAMAVN PHFAKLFMEE SCVRYDYSRN HDIKI----- -FEVYANENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC -PPCHLTVSN LKGVC-ASVP AITEITRTV- -VIC--MYIS I-LRNNVMQY VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- ----HHMQDI FID-ELYPGH ---YHATPKK D--KFC-GPG DNGGMQTIQE DMFQVLVCV t4 GIVSECRMII RDE--PDDAI DEV-C----- -------LVA SMVKELSGCE NQARNNRECA I--SPPFIGV ---RELFKR- YG-CMEGIGC VNTVAGMP-S ESSVWMFDRF MYKLTHSMVW --------GD LGRVFPDNAT FSKLKEGAPH VISV--PYFL GIDMFHDQVF YRGS--LTLP DPRYEMSMSV MYAQHHSFSQ KRCLNALDYG DRQESPHGIE HNMRKNVLLD N---PIGH-L -VIIKNENSM MGRLNHHESR AVVTFPLR-- -GAEDPKQK- ---------- -------MEG SFDK-E-E-- ----ARPVVQ DAPGVPGR-- --AQWVGRVR PWCSSDVKA- ---------- SELPRNDAIW PEVGANVYQQ ISRMKTNIPI YPHVTEYAVG I----VKPFY L-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHAEVKKL MHD-SLYLWH LETCVKPIMG TVKGDRCRAT L-KLKMIVLD P----KTLSQ -NQKRR--LC QRTWLDASPL Q------VGM KIMLQGKKTP SKK-DYEIAA DPVIMYFYQS PIIMRHTDVF EGTNNKKAQA MRL------R STASQSIGDD EMLN--MPLI QNAMCVQSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQLH NQAEK---YA EPHQIRN--- ---------- ----KVSVNV IKLLYCKDC- SCNKVLKILV WQLCESV-DD WQTLTSDPCI SAEPNPPVCV WQRELVPQL- --NRNIEDLA RIIYFGPDEH -------DEG KMMGKIFALE W-EHDMSHRD -SSVDDNYV- IEMLPFMCHR VGPWGHYAQQ LAD-RLVLTR PHT---LRDI FILA---CPV ASGGEQDAVT -ERSQHYI-V YAEL----QN DHKSPISASK HMNKMLDWDT GKT-MDREAA -MCREIKLFY -------AET NVWKQNDHSV MFI-ISDAQN RLICGSSIMC GLTFMATACN PHFAKLFMEE VCMRYDYAAN HDIKI----- -SEVYAEENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVV -PPCHLTVSN LKGVC-ASCP AITEITRTV- -VIC--MYVS I-LRNVVMQY VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- ----HHMQDI FID-ELYPGH ---YHATPKK N--RCC-GPG DNGGMQTVQE DMFQVLVCV t36 MIWSEVRKMV RER--REQAI DNK-C----- -------LIA ARVKMMSGYV LQVRNALEDA L--YVFFTGT KGTRELVKY- YM-CMEGHGC VNTMAKKS-S QSGIWKFNAF MC-LMHLMEG --------GD L--------- ---------- -CSY--PSFL GMCMFHAMVQ SKGD--LVLP DSPNEDSVSF MYIQHHSFHE KRCLNPLNVG DREDSEHIKE KHMRI----Y A---NIGG-H -IIIREWNDV MGRLNHIEPG AEVIFPLRK- -RGQHSKPV- ---------- -------IDG FFAK-D-E-- ----DRPGIQ NAVSVPCG-- --DQWVGSIR GWCSSQHRYG LANHILVHEP SKLHKTYRIN RKVGANAYDQ DSRMKAAAPV YPHIMEYAHG M----FNPFY GLSEPKNNAQ GNGEN-PMNK PCVESEDCQY EKKHASMDKL MHQ-SLYLMH INIMSKPAMG EWVGNRCRNE LTALRIIQLD VGVSGKTLGQ -NIGVSKLLN DRTWLATSPL E------IGC GVMSVEKKEA SPK-EFEVAA DPTVIYFYRN LIIQKITDVL SAVRMESPQE MRT------Q DVNSSSLGDN NGQA--MNIV PYWVCVASGV VRKTHKD-SV DKRGQTWTAK S-DFLCPLAV DQGEP---GA EQKPAVGEEN PICKPWQFVI MNTTTFAKNI FRLLYVKDV- SCNGVLPLLK WQLCDSH-GD WQSLYADSCP IA-VNAAVCG WKRELVPGL- --NHSCEHLA KSVYFEPDGE -------GEG KVMLKIFGLD WCEVERSHEH -SSVDDNYN- VNMLPFQNSR KDPVGHYVQD LED-ARRLIR PGT---ARSL TILF-YGCQY YSGEFQDCEI -ERSQLYN-V YCEH----KQ DHKSAIIANK QEQKGMDWNT GKE-MEQGPK -IILHGSLFF -------AEP NIVRQPGVSH IFI-GNDARR VLICGKSMMP GHRFMREACV PFFHKLFMAV NQMRYDYMMN YDIKIYETHW RMGVYALDNS WETLNVS-EM TSGRIGAKIN HLPRLPEQVI -PPCMLTVSG LKDTM-AGMK QETEITKTPD IKIY--MWGT INFKNRVMQY CNYYKENRSV R--------- --INNTGLAE LPK------- ---------- ----YHHQNI LID-ELYSNV ---YPAAPKK H--QYM-GVG DVGGYEVICE NLFQILVVE t87 MIWSEVRKMV RER--REQAI DNK-C----- -------LIA ARVKMMSGYM LQVRNALEDA L--YVFFTGN KGTRELVKY- YM-CMEGHGC VNTMAKKS-S QSGIWKFNAF MC-LMHLMEG --------GD L--------- ---------- -CSY--PSFL GMCMFHAMVQ SKGD--LVLP DSPNEDSVSF MYIQHHSFHE KRCLNPLNVG DREDSDHVKE KHMRI----Y A---NIGG-H -IIIREWNDV MGRLNHIEPG AEVIFPLRK- -RGQHSKPV- ---------- -------IDG FFAK-D-E-- ----DRPGIQ NAMSVPCG-- --DQWVGSIR GWCSSQHRYG LANHILVHEP SKLHKTYRIN RKVGANAYDQ DSRMKAAAPV YPHIMEYAHG M----FNPFY GLSEPKNNAQ GNGEN-PMNK PCVESEDCQY EKKHASMDKL MHQ-SLYLMH INVMSKPAMG EWVGNRCRND LTALRIIQLD VGHSGKTLGQ -NIGVSKLLN DRTWLATSPL E------IGC LVMSVEKKEA SGK-EFEVAA DPTVIYFYRN LIIAKITDVL SAVRMESPQE MRE------Q DVNSSSLGDN NGQA--MNIV PYWVCVGSGV VRKTHKD-SV DKRGQTWTAK S-DFLCPLAM DQGEI---GA EQKPAVGEKN PICKPWQFVI MNTTTFAKNI FRLLYVKDV- SCNGVLPLLK WQLCDSH-GD WQSLYADSCP IA-INAAVCG WKRELVPGL- --NHSCEHLA KSVYFEPDGE -------GEG KVMLKIFGLD WCEVERSHEH -SSVDDNYN- VNMLPFQNSR KDPVGHYVQD LED-ARRLIR PGT---ARSL TILF-YGCQY YSGEFQDCEI -ERSQLYN-V YCEH----KQ DHKSAIIANK QEQKGMDWNT GKE-MEQGPK -VILHGTLFF -------AES NIVRQPGVSH IFI-GNDARR VLICGLSMMP GHRFMREACV PFFHKLFMAV NQMRYDYMVN YDIKIYETHW RMGVYALDNS WETLNVS-EM TSGRIGAKIN HLPRLPEQVI -PPCMLTVSG LKDTM-AGMK QETEITKTPD IKIY--MWGT ICFKNRVMQY CNYYKENRSV R--------- --INNTGLAE LPK------- ---------- ----YHHQNI LID-ELYSNV ---YPAAPKK H--QYM-GVG DVGGYEMICE NLFQILVVE t89 MIWSEHRKMV REG--REQAI DNK-C----- -------LIA KRVKVMSGYI LQVRNAPEDA L--YNFFTGH KGTRELVKY- YM-CMEGEGV VNTMAKKS-S QSGIWKFNAF MC-LMRLMQG --------GD L--------- ---------- -CSY--PSFL GMCMFYAMVQ SKGD--LVLP DSCNEDSVSF MYIQHHSFHE KRCLNPLNVG DREDSDHTKE KHMRI----Y A---NIGG-H -IIIREWNDV MGRLNVIEPE AEVIFPLRK- -RGQHSKPV- ---------- -------IDG FFAK-D-E-- ----ERPGIQ NAMNVPCG-- --DQWVGSIR GWCSSQHRYG LANHILVHEP SKLHKTYKIN RKVGANAYDQ VSRMKAAAPV YPHIMEYAHG M----FNPFY GLSEPKNNAQ GNGEN-PMNV PCVESEDCQY EKKHASMTKL MHQ-SLYLMH INIMSKPAMG EWVGVRCRNE LTALRIVQLD VGHSGKTLGQ -NIGVSKLLN DRTWLATSPL E------IGC GVISVEKKEA SPK-EFEVAA DPTVIYFYRS LIIVHITDVL SAVRMDSPQE MRN------Q DVNSPSLGDN NGQA--MNIV PYWVCVASGV VRKMHKD-SV EKRGQTWEAK S-DFLCPLAV DQGEP---AA EQRPAVGEKN PICKPWQFVI MNTTTFAKNI FRLLYVKDV- SCNGVLPLLK WQLCDSH-GD WQSLFPDSCP FA-LNAAVCG WKRELVPGL- --NHSCEHLA KGVYFEPDGE -------DEG KMMLKIFGLE WCEVERSHEH -SSIDDNYT- VNMLEFPNSR KDPVGHYVQN LED-AIRLIR PGT---ARSL TILL-YGCQY YSGEFQDCEV -ERSQCYN-V YCEL----KQ DHKSAIIANK QEQKGMDWNT GKE-MEQGPK -IILHGCLFF -------AEP NIVRQPGVSH IFT-ENDARR MLICGNSMMP RHRFMREACV PFFHKLFMAV NQMRYDYVTN YDIKIYETHW RVGVYAIDNS WETLNVS-EM TSGRMGAKIN HLPRLPEQVI -PPCMLTVSG LKDTM-AGMK QETEITKTPD IEIY--MWGT INFKNQVMQY CNYYKENRSV R--------- --INNTGGAE LPK------- ---------- ----YHHQNI LID-ELYNNI ---YPAAPKK H--KFL-GVG DVGGYEIICE NLFQILVVE t39 MIWSEHRKMV REG--REQAI DNK-C----- -------LIA KRVKVMSGYI LQVRNAPEDA L--YNFFTGH KGTRELVKY- YM-CMEGEGV VNTMAKKS-S QSGIWKFNAF MC-LMRLMQG --------GD L--------- ---------- -CSY--PSFL GMCMFYAMVQ SKGD--LVLP DSCNEDSVSF MYIQHHSFHE KRCLNPLNVG DREDSDHTKE KHMRI----Y A---NIGG-H -IIIREWNDV MGRLNVIEPE AEVIFPLRK- -RGQHSKPV- ---------- -------IDG FFAK-D-E-- ----ERPGIQ NAMNVPCG-- --DQWVGSIR GWCSSQHRYG LANHILVHEP SKLHKTYKIN RKVGANAYDQ VSRMKAAAPV YPHIMEYAHG M----FNPFY GLSEPKNNAQ GNGEN-PMNV PCVESEDCQY EKKHASMTKL MHQ-SLYLMH INIMSKPAMG EWVGVRCRNE LTALRIVQLD VGHSGKTLGQ -NIGVSKLLN DRTWLATSPL E------IGC GVISVEKKEA SPK-EFEVAA DPTVIYFYRS LIIVHITDVL SAVRMDSPQE MRN------Q DVNSPSLGDN NGQA--MNIV PYWVCVASGV VRKMHKD-SV EKRGQTWEAK S-DFLCPLAV DQGEP---AA EQRPAVGEKN PICKPWQFVI MNTTTFAKNI FRLLYVKDV- SCNGVLPLLK WQLCDSH-GD WQSLFPDSCP FA-LNAAVCG WKRELVPGL- --NHSCEHLA KGVYFEPDGE -------DEG KMMLKIFGLE WCEVERSHEH -SSIDDNYT- VNMLEFPNSR KDPVGHYVQN LED-AIRLIR PGT---ARSL TILL-YGCQY YSGEFQDCEV -ERSQCYN-V YCEL----KQ DHKSAIIANK QEQKGMDWNT GKE-MEQGPK -IILHGCLFF -------AEP NIVRQPGVSH IFT-ENDARR MLICGNSMMP RHRFMREACV PFFHKLFMAV NQMRYDYVTN YDIKIYETHW RVGVYAIDNS WETLNVS-EM TSGRMGAKIN HLPRLPEQVI -PPCMLTVSG LKDTM-AGMK QETEITKTPD IEIY--MWGT INFKNQVMQY CNYYKENRSV R--------- --INNTGGAE LPK------- ---------- ----YHHQNI LID-ELYNNI ---YPAAPKK H--KFL-GVG DVGGYEIICE NLFQILVVE t8 MIWSEHRKMV REG--REQAI DNK-C----- -------LIA KRVKVMSGYI LQVRNAPEDA L--YNFFTGH KGTRELVKY- YM-CMEGEGV VNTMAKKS-S QSGIWKFNAF MC-LMRLMQG --------GD L--------- ---------- -CSY--PSFL GMCMFYAMVQ SKGD--LVLP DSCNEDSVSF MYIQHHSFHE KRCLNPLNVG DREDSDHTKE KHMRI----Y A---NIGG-H -IIIREWNDV MGRLNVIEPE AEVIFPLRK- -RGQHSKPV- ---------- -------IDG FFAK-D-E-- ----ERPGIQ NAMNVPCG-- --DQWVGSIR GWCSSQHRYG LANHILVHEP SKLHKTYKIN RKVGANAYDQ VSRMKAAAPV YPHIMEYAHG M----FNPFY GLSEPKNNAQ GNGEN-PMNV PCVESEDCQY EKKHASMTKL MHQ-SLYLMH VNIMSKPAMG EWVGVRCRNE LTALRIVQLD VGHSGKTLGQ -NIGVSKLLN DRTWLATSPL E------IGC GVISVEKKEA SPK-EFEVAA DPTVIYFYRS LIIVHITDVL SAVRMDSPQE MRN------Q DVNSPSLGDN NGQA--MNIV PYWVCVASGV VRKMHKD-SV EKRGQTWEAK S-DFLCPLAV DQGEP---AA EQKPAVGEKN PICKPWQFVI MNTTTFAKNI FRLLYVKDV- SCNGVLPLLK WQLCDSH-GD WQSLFPDSCP FA-LNAAVCG WKRELVPGL- --NHSCEHLA KGVYFEPDGE -------DEG KMMLKIFGLE WCEVERSHEH -SSIDDNYT- VNMLPFPNSR KDPVGHYVQD LED-AIRLIR PGT---ARSL TILL-YGCQY YSGEFQDCEV -ERSQCYN-V YCEL----KQ DHKSAIIANK QEQKGMDWNT GKE-MEQGPK -IILHGCLFF -------AEP NIVRQPGVSH IFT-ENDARR MLICGNSMMP RHRFMREACV PFFHKLFMAA NQMRYDYVTN YDIKIYETHW RVGVYAIDNS WETLNVS-EM TSGRMGAKIN HLPRLPEHVI -PPCMLTVSG LKDTM-AGMK QETEITKTPD IEIY--MWGT INFKNQVMQY CNYYKENRSV R--------- --INNTGGAE LPK------- ---------- ----YHHQNI LID-ELYNDI ---YPAAPKK H--KFL-GVG DVGGYEIICE NLFQILVVE t6 MIWSEHRKMV REG--REQAI DNK-C----- -------LIA KRVKVMSGYI LQVRNAPEDA L--YNFFTGH KGTRELVKY- YM-CMEGEGV VNTMAKKS-S QSGIWKFNAF MC-LMRLMQG --------GD L--------- ---------- -CSH--PSFL GMCMFYAMVQ SKGD--LVLP DSCNEDSVSF MYIQHHSFHE KRCLNPLNVG DREDSDHTKE KHMRI----Y A---NIGG-H -IIIREWNDV MGRLNVIEPE AEVIFPLRK- -RGQHSKPV- ---------- -------IDG FFAK-D-E-- ----ERPGIQ NAMNVPCG-- --DQWVGSIR GWCSSQHRYG LANHILVHEP SKLHKTYKVV RKVGANAYDQ VSRMKAAAPV YPHIMEYAHG M----FNPFY GLSEPKNNAQ GNGEN-PMNV PCVESEDCQY EKKHASMTKL MHQ-SLYLMH INIMSKPAMG EWVGVRCRNE LTALRIVQLD VGHSGKTLGQ -NIGVSKLLN DRTWLATSPL E------IGC GVISVEKKEA SPK-EFEVAA DPTVIYFYRS LIIVHITDVL SAVRMDSPQE MRN------Q DVNSPSLGDN NGQA--MNIV PYWVCVASGV VRKMHKD-SV EKRGQTWEAK S-DFLCPLAV DQGEP---AA EQKPAVGEKN PICKPWQFVI MNTTTFAKNI FRLLYVKDV- SCNGVLPLLK WQLCDSH-GD WQSLFPDSCP FA-LNAAVCG WKRELVPGL- --NHSCEHLA KGVYFEPDGE -------DEG KMMLKIFGLE WCEVERSHER -SSIDDNYT- VNMLPFPNSR KDPVGHYVQD LED-AIRLIR PGT---ARSL TILL-YGCQY YSGEFQDCEV -ERSQCYN-I YCEL----KQ DHKSAIIANK QEQKGMDWNT GKE-MEQGPK -IILHGCLFF -------AEP NIVRQPGVSH IFI-ENDARR MLICGNSMMP RHRFMREACV PFFHKLFMAV NQMRYDYMTN YDIKIYETHW RVGVYAIDNS WETLNVS-EM TSGRMGAKIN HLPRLPEQVI -PPCMLTVSG LKDTM-AGMK QETEITKTPD IEIY--MWGT INFKNQVMQY CNYYKENRSV R--------- --INNTGGAE LPK------- ---------- ----YHHQNI LID-ELYNNI ---YPAAPKK H--KFL-GVG DVGGYEIICE NLFQILVVE t58 MIWSEYRHMV REG--RDQAI DNK-C----- -------LIA ARVKVMSGYV LQVRNAPEDA L--YNFFTGH KGTRELVKY- YM-CMEGEGC VNTMAKKS-S QSGIWKFNAF MC-LMHLMQG --------GD L--------- ---------- -CSH--PSFL GMCMFHAMVK SKGN--LVLP DSCNEDSVSF MYIQHHSFHE KRCLNHLNVG DREDSEHIKE KHMRV----Y A---SIGG-H -IIIREWNDV MGRLNHIEPG AEVTFPLRR- -RGQASKPV- ---------- -------IDG FFAK-D-E-- ----DRPGIQ NAMSVPCG-- --DQWVGSVR GWCSSQHRYG LAIHILVHQQ SRLHKTYNID RKNGANAYEQ DSRMKAGAPV YPHIMEYAHG M----FNPFY GMSEPKNNAQ GNGEN-PMNV PCVESDDCQY EKKHASMDKQ MHQ-SLYLMH MNIMSKPAMG EWVGNRCRNE LTALHVVQLD VGFSGKTLGQ -NIGISELLN DRTWLATSPL E------IGC GVMAVEKKEA SPK-EFEVAA DPTVIYFYRN LIIQHITDVL SAVRMDSPQE MRV------Q DVNSPSLGDN NGQA--MNIV PYWVCVVSGV VRKTHKD-SV EKRGQTWTAK S-DFLCPIAV NQGEP---GA EQKPAVGA-N PIVKPWQFVI MNTTTFAKNI FRLLYVKDV- SCNGVLPLLK WQLCDSH-GD WQSLFADSCP TA-VNAAVCA WKRELVPGL- --NHSCEHLA KSVYFEPDGE -------DEG KMMLKIFGLD WCEVERSHEH -SSVDDNYN- VNMLPFPNSR QDPVGHYVQD LED-LLRLIR PGT---ARSL TVLF-YGCQY YSGQFQDCEI -ERSQLYN-V YCEL----KQ DHKSAIMANK QEQKGMDWNT GKE-MEQGPK -IILHGVLFF -------AEP NVVRQPGVSH IFV-GNDARR MLICGLSMMP GHRFMKEACV PFFHKLFMAV RRMRYDYMSN YDIKIYETHW RPGVYALDNS WETLNVS-EM TSGRIGAKIN HLPRLPETVI -PPCVLTVSG LKDTM-AGMK HGTEITKTPD IKIH--MWGT INFKNKVMQY CTYYKENRSV R--------- --INNTGGAE LPK------- ---------- ----YHHQNI LID-ELYSNI ---YPATPKK H--QYM-GVG DVGGYEVICE NLFQILVVE t54 MIWSEYRHMV REG--RDQAI DNK-C----- -------LIA ARVKVMSGYV LQVRNAPEDA L--YNFFTGH KGTRELVKY- YM-CMEGEGC VHTMAKKS-S QSGIWKFNAF MC-LMHLMQG --------GD L--------- ---------- -CSH--PSFL GMCMFHAMVQ SKGN--LVLP DSCNEDSVSF MYIQHHSFHE KRCLNHLNVG DREDSEHIKE KHMRI----Y A---SIGG-H -IIIREWNEV MGRLNHIEVG AEVTFPLRR- -RGQASKPV- ---------- -------IDG FFAK-D-E-- ----DRPGIQ NAMSVPCG-- --EQWVGSVR GWCSSQHRYG LAIHILVHQQ SRLHKTYNID RKNGANAYDQ DSRMKAGAPV YPHIMEYAHG M----FVPFY GMSEPKNNAQ GNGEN-PMNV PCVESDDCQY EKKHASMDKQ MHQ-SLYLMH MNIMSKPAMG EWVGNRCRNE LTALHIVQLD VGFSGKTLGQ -NIGISELLN DRTWLATSPL E------IGC GVMAVEKKEA SPK-EFEVAA DPTVIYFYRN LIIQHITDVL SAVRMDSPQE MRV------Q DVNSPSLGDN NGQA--MNIV PYWVCVVSGV VRKTHKD-SV EKRGQTWTAK S-DFLCPIAV NQGEP---GA EQKPAVGA-D PIVKPWQFVI MNTTTFAKNI FRLLYVKDV- SCNGVLPLLK WQLCDSH-GD WQSLFADSCP IA-VNAAVCA WKRELVPGL- --NHSCEHLA KSVYFEPDGE -------DEG KMMLKIFGLD WCEVERSHEH -SSVDDNYN- VNMLPFPNSR KDPVGHYVQD LED-LLRLIR PGT---ARSL TVLF-YGCQY YSGEFQDCEI -ERSQLYN-V YCEL----KQ DHKSAIMANK QEQKGMDWNT GKE-MEQGPK -IILHGVLFF -------AEP NVVRQPGVSH IFV-GNDARR MLICGLSMMP GHRFMKEACV PFFHKLFMAV RRMRYDYMTN YDIKIYETHW RPGVYALDNS WETLNVS-EM TSGRIGAKIN HLPRLPETVI -PPCVLTVSG LKDTM-AGMK HGTEITKTPD IKIH--MWGT INFKNKVMQY CTYYKENRSV R--------- --INNTGGAE LPK------- ---------- ----YHHQNI LID-ELYSNI ---YPATPKK H--QYM-GVG DVGGYEVICE NLFQILVVE t65 MIWSEYRQMV REG--NDQSI DNK-C----- -------LIA DRVKMMSGVV RQVRNAPEDA L--YNFFTGQ KGTRELIKF- YF-CMEGCGC VNTMAKNS-S QSAPWKFNRF MC-LMHLMQS --------GD L--------- ---------- -CNS--PSFL GMCMFHPQVQ AKGE--LVLP DNPNENSVEF MYRRHHSLSD KRCLNLLYVG DSQDSQHLRE KQLRI----Y G---RIGK-V -VIIKVWNAV MGRLNHHEPP AEVMFPLRK- -GGQDSKPF- ---------- -------IDG FF-------- ----DRPGIA NAMSVKCG-- --DQWVGSIR GWCSSQHQYG LANHILVHGP SKLHRTYAIN KKMGANVYQQ TSRMKAAAPL YPHIMEYAHG M----VKPFY GISEPKNEAQ GNGEN-PMNV PSVESGECQY EHKHAAMEKL MHH-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALQTVQLD VGSSGKTLGQ -NIVTSKKLG DRTWLAASPL Q------IGC GVMVQEKKIA SPQ-EVEVAA D---IYFYRN MVVQRLTDVV AAVRMQSPQE MRTPVLVCIQ KVDSVSLGDD DPQM--MNIV PYWVCVCSGV VKKAEPD-SV DKRGKTWVAK S-DFLCPAAV NEGEH---GA EQRP-FGTQN PVCKPWRFVI MNTQTFAKNI IRLLYIKDI- SCNQVLQLLD WQLCDSH-GD WQSLMADSCP NA-VNIAVCC WKRELMPGL- --NHSCEHLA KSVYFKPDGE -------DEG QMTMKIFNLD WCEVEKSHEK -SSVDDNYN- VNMLPFMQSH GDPVGHYVQE LED-ALRLIR PGT---ARAL VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DYKSAIIANQ QEHKGMEWDT GKE-MQQGPK -VVVHLALFY APSNVLAAEP NIIGQPQVSH VFV-GNDARR MLIVGVSMMP AYKFMREACV PFFRKLFMAE NQVRYDYMWN YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVT -PPCILTVSG LKRTV-AGAP DETEITKSPV IHIV--MWAD IMFKNCVMQY CTYYKENRSV R--------- --ANNTGGAE LPK------- ---------- ----YHQQNI LTD-ELYSDV ---YPAAQKK Y--QVM-GVG DIGGYELICE DLFQILVCQ t29 MIWSEYRQMV REG--NDQSI DNK-C----- -------LIA ARVKMMSGVV RQVRNAPEDA L--YNFFTGQ KGTRELIKF- YF-CMEGCGC VNTMAKNS-S QSAPWKFNRF MC-LMHLMQS --------GD L--------- ---------- -CNC--PSFL GMCMFHPQVQ AKGE--LVLP DNPNENSVEF MYRRHHSLSD KRCLNLLFVG DSQDSQHQRE KQLRI----Y G---RIGK-V -VIIKVWNAA MGRLNHVEPP AEVMFPLRR- -GGSDSKPF- ---------- -------LDG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGSIR GWCSSQHQYG LANHILIHGP SKLHRTYAIN KKMGANVYQQ TSRMKAAAPL YPHIMEYAHG M----VKPFY GQSEPKNEAQ GNGEN-PMNV PSVESGECQY ESKHAAMEKL MHQ-SLYLMR VNTMPKPVMG DWDGNRCRAE LTALQTVQLD VGSSGKTLGQ -NIVTSKKLG DRTWLAASPL Q------IGC GVMVQEKKIA SPQ-EVEVAA D---IYFYRN MVVQRLTDVV AAVRMQSPQE MRTPVLVCIQ KVDSVSLGDD DPQM--MNIV PYWVCVCSGV VKKAEPD-SV DKRGKTWVAK S-DFLCPPAV NEGEH---GA EQRPVFGTHN PVCKPWRFVI MHTQTFSKNI IRLLYIKDI- SCNQVLQLLD WQLCDSH-GD WQSLMADSCP NA-VNIAVCC WKRELMPGL- --NHSCEHLA KTVYFKPDGE -------DEG QMSQKIFNLD WCEVEKSHEK -SSVDDNYN- VNMLPFMQSH GDPVGHYVQE LEDSALRLIR PGT---ARAL VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DYKSAIIANQ QDHKGMEWDT GKE-MQQGPK -VVVHLALFY MPSNVLAAEP NIIGQPQVSH WFV-GNDARR MLIVGVSMMP AYKFMREACV PFFRKLFMAD NQVRYDYMWN YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVT -PPCILTVSG LKRTV-AGAP DETEITKSPV IHIV--MWAD IMFKNCVMQY CTYYKENRSV R--------- --ANNTGGAE LPK------- ---------- ----YHQQNI LTD-ELYSDL ---YPAAPKK Y--QEM-GVG DIGGYELIVE DLFQILVCE t98 MIWSEHRQMV REG--NDQSI DNK-C----- -------LIA ARVKMMSGVV RQVRNAPEDA L--YNFFTGQ KGTKELVKF- YF-CMEGCGC VNTMAKKS-S QSAPWKFNRF MC-LMHLMQS --------GD L--------- ---------- -CNS--PPFL GMCMFHPQVQ MKGE--LVLP DNPNENSVEF MYRRHHSLSD KRCLNLLYVG DSQDSQHLRE KQLRI----Y G---RIGK-V -VIIKVWNAV MGRLNHHEPP AEVMFPLRR- -GGSDSKPF- ---------- -------IDG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGSIR GWCSSQHQYG LANHILVHNP SRLHRTYAIN KKMGANVYQQ SSRMKAAAPL YPHIMEYAHG M----VKPFY GISEPKNEAQ GNGEN-PMNV PSVESGECQY ESKHAAMEKL MHQ-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALQTVQLD VGSSEKTLGQ -NIVTSKRLG DRTWLAASPL Q------IGC GVMVQEKKIA SPQ-EVEVAA D---IYFYRN MVVQRHTDVV AAVRMQSPQE MRNPVLVCIQ KVDSASLGDD DPQM--MKIV PYWVCVCSGV VKKAEPD-SV DKRGKTWVAK S-DFLCPPAV NEGEH---GA EQRPVFGTQN PVCKPWRFVI MNTQTFSKNI IRLLYIKDI- SCNQVLQLLD WQLCDSH-GD WQSLMADSCP PA-VNIAVCC WKRELMPGL- --NHSCEHLA KTVYFKPDGE -------DEG QMTMKIFNLD WCEVEKSHEK -SSVDDNYN- VNMLPFMQSH GDPVGHYVQE LED-ALRLIR PGT---ARAL VILF-YGCQY YSGKFQDSEM FERSQLYR-V YCEI----KK DYKSAIIANQ QEHKGMEWDT GKE-MQQGPK -IVVHLGLFY MPSNVLAAEP NIIGQPQVSH VFV-ANDARR MLIVGVSMMP AYKFMREACV PFFRKLFMAE NQVRYDYVWN YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVT -PPCDLTVSG LKRTV-AGPP DETEITKSPV IHIV--MWAD IVFKNCVMQY CTYYKENRSV R--------- --ANNTGGAE LPK------- ---------- ----YHQQNL LTD-ELYSDI ---YPAAPKK Y--QVM-GVG DIGGYELIVE DLFQILVCE t83 MIWSEYRQMV REG--NDQSI DNK-C----- -------LIA ARVKMMSGVV RQVRNAPEDA L--YNFFTGQ KGTKELIKF- YF-CMEGCGT VNTMAKDS-S QSAPWKFNRF MC-LMHLMQS --------GD L--------- ---------- -CNS--PSFL GMCMFHPQVQ AKGE--LVLP DNPNENSVEF MYRRHHSLSD KRCLNLLYVG DSQDSQHLRE KQLRI----Y G---RIGK-V -VIIKVWNAV MGRLNHHEPP AEVMFPLRR- -GGSCSKPF- ---------- -------VDG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGSIR GWCSSQHQYG LANHILIHNP SRLHRTYAIN KKMGANVYQQ TSRMKAAAPL YPHIMEYAHG M----VKPFY GISEPKNEAQ GNGEN-PMNV PSVESGECQY ESKHAAMEKL MHQ-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALQTVQLD LGSSGKTLGQ -NIVTSKKLG DRTWLAASPL Q------IGC GIMVQEKKIA SPQ-EVEVAA D---IYFYRN MVVQRHTDVV AAVRMQSPQE MRNPVLVCIQ KVDSISLGDD DPQM--MNIV PYWVCVCSGV VKKAEPD-SV DKRGKTWVAK S-DFLCPPAV NEGEH---GA EQRPVFGTQN PVCRPWRFVI MNMQTFSKNI IRLLYIKDI- SCNQVLQLLD WQLCDSH-GD WQSLMADSCP PA-VNIAVCC WKRELMPGL- --NHSCEHLA KTVYFKPDGE -------DEG QMTMKIFNLD WCEVEKSHEK -SSVDDNYN- VNMLPFMQSH GDPVGHYVQE LED-ALRLIR PGT---ARAL VILF-YGCQY YSGKFQDSDM FERSQLYR-V YCEI----KK DYKSAIIANQ QEHKGMEWDT GKE-MQQGPQ -IEVHLALFY MPSNVLPAEP NIIGQPQVSH VFV-GNDARR MLIVGVSLMP AYKFMREACV PFFRKLFMAE NQVRYDYMWN YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVT -PPCILTVSG LKRTI-AGAP DETEITKSPV IHIV--MWAD IMFKNCVMQY CTYYKENRSV R--------- --ANNTGGAE LPK------- ---------- ----YHQQNL LTD-ELYSDI ---YPAAPKK Y--QVM-GVG DIGGYELIVE DLFQILVCE t95 MIWSEYRQMV REG--TDQSI DNK-C----- -------LIA ARVKMMSGVV RQVRNAPEDA L--YNFFTGQ KGTRELIKF- YF-CMEGCGC VNTMAKNS-S QSAPWKFNRF MC-LMHLMQL --------GD L--------- ---------- -CNP--PSFL GMCMFHPQVR SKGE--LVLP DNPNENSVEF MYRRHHSLSD KRCLNLLYTG DSQDSQHLRE KQLRI----Y G---RIGR-V -VILQVWNAV MGRLNHHEPP AEVMFPLRK- -GGSNSKPV- ---------- -------YDG FF-------- ----DRPAIA NAMSVPCG-- --DQWVGSIR GWCSSQHQYG LANHILVHGP SKLHRTYAIN RKVGANVYQQ TSRMKAAAPL YPHIMEYAHG M----VKPFY GISEPKNEAQ GNGEN-PMNV PSVESGECVY ESKHAAMEKL MHQ-SLYLMR VNTMSKPVMG DWVGHRCRAE LTELQTVQLD VGSSGKTLGQ -NIVTSKKLG DRTWLAASPL Q------IGC GVMVQEKKIA SPQ-QVEVAA D---IYFYRN MVVQRLTDVV AAVRMQSPQE MRCRKLVCIQ KVDSPSLGDD DPQM--MNIV PYWVCVCSGV VKKAEPD-SV DKRGKTWVAK S-DFLCPPAV NEGEH---GA EQRPVFGGQN PACKPWHFVI MNRQTFAKNM IRLLYIKDI- SCNEVLQLLD WQLCDSH-GD WQSLIADSCP NA-SNIAVCC WKRELMPGL- --NFSCEHLA KTVYFKPDGE -------DEG QMIMKIFNLD WCEVEKSHEK -SSVDDNYD- VNMLPFMQSH GDPVGHYVQG LED-ALRLIR PGT---ARAL IILF-YGCQY YSGKFQDSEL FERSQLYR-M YCEI----KK DHKSAIIANQ QEHKGMEWDT GKE-MQQGPK -VVLHHALFY APSNVLAAEP NIIGQPQVSH VFV-GNDARR MLITGVSMMP AYKFMREACV PFFRKLFMAE NQVRYDYVWN YDIKIYETHY RMGVYAVDNS WETLVYC-EM TSGRIGAKVN HLPRLPEQVT -PPWVLTVSG LKRTV-AGAP DETEITKSPI IHII--MWEE IMFKNCVMQY CTYYKENRSV R--------- --ANNTGGAE LPK------- ---------- ----YHQQNI LTD-ELYSDI ---YPAAPKK Y--QVM-GVG DIGGYELICE DLFQILVCE t20 MIWSEYRQMV REG--TDQSI DNK-C----- -------LIA ARVKMMSGVV RQVRNAPEDA L--YNFFTGQ KGTRELIKF- YF-CMEGCGC VNTMAKNS-S QSAPWKFNRF MC-LMHLMQL --------GD L--------- ---------- -CNP--PSFL GMCMFHPQVR SKGE--LVLP DNPNENSVEF MYRRHHSLSD KRCLNLLYVG DSQDSQHLRE KQLRI----Y G---RIGR-V -VILQVWNAV MGRLNHHEPP AEVMFPLRK- -GGSNSKAV- ---------- -------YDG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGSIR GWCSSQHQYG LANHILVHGP SKLHRTYAIN RKVGANVYQQ MSRMKAAAPL YPHIMEYAHG M----VKPFY GISEPKNEAQ GNGEN-PMNV PSVESGECVY ESKHAAMEKL MHQ-SLYLMR VNTMSKPVMG DWVGHRCRAE LTELQTVQLD VGSSGKTLGQ -NIVTSKKLG DRTWLAAPPL Q------IGC DVMVQEKKIA SPQ-QVEVAA D---IYFYRN MVVQRLTDVV AAVRMQSPQE MRTPKLVCIQ KVDSPSLGDD DPQC--MNIV PYWVCVCSGV VKKAEPD-SV DKRGKTWVAK S-DFLCPPAV NEGEH---GA EQRPVFGGQN PACQPWHFVI MNTQTFAKNM IRLLYIKDI- SCNQVLQLLD WQLCDSH-GD WQSLIADSCP NA-VNIAVCC WKRELMPGL- --NFSCEHLA KTVYFKPDGE -------DEG QMIMKIFNLD WCEVEKSHEK -SSVDDNYD- VNMLPFMQSH GDPVGHYVQG LED-ALRLIR PGT---ARAL IILF-YGCQY YSGKFQDSEL FERSQLYR-M YCEI----KK DHKSAIIANQ QEHKGMEWDT GKE-MQQGPK -VVLHHALFY APSNVLAAEP NIIGQPQVSH VFV-GDDARR MLITGVSMMP AYKFMREACV PFFRKLFMAE NQVRYDYVWN YDIKIYETHY RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVT -PPWVLTVSG LKRTV-AGAP DETEITKSPI IHII--MWED IMFKNCVMQY CTYYKENRSV R--------- --ANNTGGAE LPK------- ---------- ----YHQQNI LTD-ELYSDI ---YPAAPKK Y--QVM-GVG DIGGYELICE DLFQILVCE t69 MIWSDYRQMV REG--GDDSI DNK-C----- -------LIA ARVKMMSGVV RQVRNAPEDA L--YNFFTGH KGTRELVKY- YH-CMEGCGC VNTMAKSS-S QSAPWKFNRF MC-LMHLMQE --------GD L--------- ---------- -CNS--PGFL GMCMFHPQVQ RKGE--LVLP DNPNENSVEF MYRRHHSLSD KRCLNLLYVG DNQDSQHLRE KQLRM----Y G---EIGK-V -VIIKVVNAV MGRLNHVEPS AEVMFPLRK- -GGSDSKPF- ---------- -------IDG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGSIR GWCSSQHQYG LANHILVHGP SKLHRTYAIN KKMGANVYQQ ISRMKALAPL YPHIMEYAHG M----VKPFY GISEPKNEAQ GNGEN-PMNV PCVESGECQY ENKHAAMEKL MHQ-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALKTGQLD VGSSGKTLGQ -NIMTSKKLG DRTWLAANPL Q------IGR GVMVWEKKVA SPQ-EVEVAA D---IYFYRN MVIECLTDVV RAVRMQSPQE MRAPVLVCIQ KVDSPSLGDD VPQM--MNIV PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK S-DFLCPPAM NEGEH---GA EQRPVFGTQN PVCKPWRFVI MNTETFAKNI IRLLYVKDV- SCNQVLQLLD WQLCDSH-GD WQSLVADSCP NA-VNIAVCC WKRELMPGL- --NHSCQHLA KTVYFKPDGE -------DEG QMIVKIFNLD WCEVEKSHEK -SSVDDNYN- VNMLPFMQSH GDPVGHYVQG LED-ALRLIR PGT---ARAL VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHNSAIIANQ QEHKGMDWDT GKE-MQQGPK -IVVVLALFY QPSNVLAAEP NIMGQPQVSV VFV-GNDGRR MLIVGCSMMP DYKFMREACV PFFRKLFMAE VQDRYDYAWN YNIKIYETHF RDGVYAVDNS WETLVYV-EM ASGRIGAKIN HQPRLPEQVI -PPCILTVSG LKRTI-ADEP DETEITKTPI IHIV--MWAD IMFKNCVMQY CNYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHQQNI LTD-ELYSDI ---YPAAPKK Y--QIM-GVG DIGGYELICE DLFQILVCE t2 MIWSDYRQMV REG--GDDSI DNK-C----- -------LIA ARVKMMSGVV RQVRNAPEDA L--YNFFTGQ KGTRELVKY- YH-CMEGCGC VNTMAKSS-S QSAPWKFNRF MC-LMYLMQD --------GD L--------- ---------- -CNS--PGFL GMCMFHPQVQ AKGE--LVLP DNPNENSVEF MYRRHHSLSD KRCLNLLYVG DNQDSQHLRE KQLRM----Y G---EIGK-V -VIIKVVNAV MGRLNHHEPS AEVMFPLRK- -GGSDSKPF- ---------- -------IDG FF-------- ----DRPGIA NAMSVPCG-- --DQWMGSIK GWCSSQHQYG LANHILVHGP SKLHRTYAIN KKMGANVYQQ ISRMKALAPL YPHIMEYAHG M----VKPFY GISEPKNEAQ GNGEN-PMNV PCVESGECQY ENKHAAMEKL MHQ-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALKTGQLD VGSSGKTLGQ -NIMVSKKLG DRTWLAASPL Q------IGC GVMVWEKKVP SPQ-EVEVAA D---IYFYRN MVIECLTDVV RAVRMQSPQE MRAPVLVCIQ KVDSPSLGDD VPQM--MNIV PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK S-DFLCPPAM NEGEH---GA EQRPVFGTQN PVCKPWRFVI MNVETFAKNI IRLLYVKDV- SCNQVLQLLD WQLCDSH-GD WQSMVADSCH NA-VNIAVCC WKRELMPGL- --NHSCQHLA KTVYFKPDGE -------DEG QMMVKIFNLD WCEVEKSHEK -SSVDDNYN- VNMLPFMQSH GDPVGHYVQG LED-ALRLIR PGT---ARAL VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHNSAIIANQ QEHKGMDWDT GKE-MQQGPK -IVVVLALFY QPSNVLAAEP NIMGQPQVSV VFV-GNDARR MLIVGCSMMP DYKFMREACV PFFRKLFMAE VQDRYDYNWN YNIKIYETHF RVGVYAVDNS WETLVYV-EM ASGRIGAKIN HMPRLPEQVI -PPCILTVSG LKRTI-ADEP DETEITKTPI IHIV--MWAD IMFKNCVMQY CNYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHQQNI LTD-ELYSDI ---YPAAPKK Y--QIM-GVG DIGGYELICE DLFQILVCE t10 MIWSDYRQMV REG--GDDSI DNK-C----- -------LIA ARVKMMSGVV RQVRNAPEDA L--YNFFTGQ KGTRELVKY- YH-CMEGCGC VNTMAKSS-S QSAPWKFNRF MC-LMYLMQD --------GD L--------- ---------- -CNS--PGFL GMCMFHPQVQ AKGE--LVLP DNPNENSVEF MYRRHHSLSD KRCLNLLYVG DNQDSQHLRE KQLRM----Y G---EIGK-V -VIIKVVNAV MGRLNHHEPS AEVMFPLRK- -GGSDSKPF- ---------- -------IDG FF-------- ----DRPGIA NAMSVPCG-- --DQWMGSIK GWCSSQHQYG LANHILVHGP SKLHRTYAIN KKMGANVYQQ ISRMKALAPL YPHIMEYAHG M----VKPFY GISEPKNEAQ GNGEN-PMNV PCVESGECQY ENKHAAMEKL MHQ-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALKTGQLD VGSSGKTLGQ -NIMVSKKLG DRTWLAASPL Q------IGC GVMVWEKKVP SPQ-EVEVAA D---IYFYRN MVIECLTDVV RAVRMQSPQE MRAPVLVCIQ KVDSPSLGDD VPQM--MNIV PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK S-DFLCPPAM NEGEH---GA EQRPVFGTQN PVCKPWRFVI MNVETFAKNI IRLLYVKDV- SCNQVLQLLD WQLCDSH-GD WQSMVADSCH NA-VNIAVCC WKRELMPGL- --NHSCQHLA KTVYFKPDGE -------DEG QMMVKIFNLD WCEVEKSHEK -SSVDDNYN- VNMLPFMQSH GDPVGHYVQG LED-ALRLIR PGT---ARAL VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHNSAIIANQ QEHKGMDWDT GKE-MQQGPK -IVVVLALFY QPSNVLAAEP NIMGQPQVSV VFV-GNDARR MLIVGCSMMP DYKFMREACV PFFRKLFMAE VQDRYDYNWN YNIKIYETHF RVGVYAVDNS WETLVYV-EM ASGRIGAKIN HMPRLPEQVI -PPCILTVSG LKRTI-ADEP DETEITKTPI IHIV--MWAD IMFKNCVMQY CNYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHQQNI LTD-ELYSDI ---YPAAPKK Y--QIM-GVG DIGGYELICE DLFQILVCE t31 MIWSEYRSMV REG--ADQSI DNK-C----- -------LIA ARVKMMSGVV RQVRNAPEDA L--YTFFTGQ KGTRELIKY- YF-CMEGCGC VNTMAKQS-S QSAPWKFNRF MC-LMHLMQN --------GD L--------- ---------- -CNL--PSFL GMCMFHNQVQ CKGE--LVLP DNVNENSVEF MYRRHHSLSD KRCLNLLYVG DNQDSQHLRE KQLRI----Y Q---NIGK-V -VIIKVWNAN MGRLNHHEPP AEVMFPLRK- -GGSDSKPF- ---------- -------IDG FF-------- ----DRPGIA NAMNVPCG-- --DQWVGCIR GWCSSQHQYG LANHILVHGP SKLHRTYAIN KKMGANVYQQ TSRMKAAAPI YPHIMEYAHG M----VKPFY GISEPKNEAQ KNGEN-PMNV PCVESGECQY ESKHAAMEKL MHQ-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALKTVQLD VGSNGKTLGK -NIMTPKKLG DRTWLAASPL Q------IGC GVMVQEKKVA SPQ-EVE-AA D---IFFYRN MVIQRLTDVV AAVRMQSPQE MRPPVLVCIQ YVDSPSLGDD TPQM--MNIA PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK A-DFLCPPAV SEGEH---GA EQRPVFGMQN PACKPWRFVI MNTQTFAKNI IRLLYIKDV- SCNAVLQLLD WQLCYSH-GD WQSLIADSCP TA-ANIAVCC WKRELMPGL- --NHSCEHLA KTVYFKPDGE -------DEG QMIMKIFNLD WCEVEKSHEK -SSVDDNYH- VNMLPFAQSN RDPVGHYVQG LED-ALRLIR PGT---ARAL VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHKSAIIANQ HEVKGMDWNT GKE-MQQGPK -IVVHLALFY APSNVLAAEP NIVGQPQVSH VFV-GNDARR MLIVGVSVMP SYKFMREACV PFFHKLFMAD NQVRHEYMWN YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGANIN HLPRLPEQVV -PPCILTVSG LKRTI-AGSP DETEITKTLI YHIV--MWAD IMFKNCVMQY STYYKENRSV R--------- --ANNTGGAE LPK------- ---------- ----YHQQNI LTD-ELYSDM ---YPAAPKK YVIRVM-GVG DIGGYEMICE DLFQILVCE t15 MIWSEYRSMV REG--SDQSI DNH-C----- -------LIA ARVKMMSGVV RQVRNAPEDA L--YTFFTGQ KGTRELIKY- YF-CMEGCGC VNTMAKNS-S QSAPWKFNRF MC-LMHLMQN --------GD L--------- ---------- -CNS--PSFL GMCMFHNQVP NKGA--LVLP DNVNENSVEF MYRRHHSLSD KRCLNLLYVG DNQDSQHLRE KQLRI----Y P---SIGK-V -VIIKVWNAA MGRLNHHEPP AEVMFPLRK- -GGSDSKPF- ---------- -------IDG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGCIR GWCSSQHQYG LANHILVHGP SKLHRTYAIN RKMGANVYQQ TSRMKAAAPL YPHIMEYAHG M----VEPFY GISEPKNEAQ KNGEN-PMNV PCVESGECQY ESKHAAMEKL MHQ-SLYLMR VNTMSKPVLG DWVGNRCRAE LTALKTVQLD IGNSGKTLGQ -NIMTPKRLG ERTWLAASPL Q------IGC GVVVQEKKVA SPQ-EVEVAA D---IYFYRN MVIQTLTDVV AAVRMQSPQE MRPPVLVCIQ DVDSVSLGDD TPQM--MNIA PYWVCVCSGV VKKTQPD-SV DKRGKTWVAK S-DFLCPPAV SEGEH---GA EQRPVFGMRN PICKPWRFVI MNTQTFAKNI IRLLYVKDV- SCNAVLQLLD WQLCYSH-GD WQSLIADSCS TA-ANIAVCC WKRELMPGL- --NHSCEHLA KTVYFKPDGE -------DEG QMIVKIFHLD WCEVEKSHEK -SSVDDNYH- VNMLPFAQSN GDPVGHYVQG LED-ALRLIR PGT---ARAL VILS-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHKSAIVANQ QEVKGMDWDT GKE-MQQGPK -IVVHLALFY APSNVLAAEP NIVGQPQVSH VFV-GNDARR MLIVGVSVMP AYKFMREACV PFFHKLFMAE NQVRHDYMWN YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVV -PPCILTVSG LKRTI-AGAP DETEITKTLI YHIV--MWAD IMFKNCVMQY STYYKENRSV R--------- --GNNTGGAE LPK------- ---------- ----YHQQNI LTD-ELYSDV ---YPTAPKK YVIRVM-GVG DIGGYELICE DLFQILVCQ t63 MIWSEYRSMV REG--SDQSI DNK-C----- -------LTA ARVKLMSGVV RQVRNAPEDA L--YTFFTGQ KGKRELIKY- YF-CMEGCGC VNTMAKNS-S QSAPWKFNRF MC-LMHLMQD --------GD L--------- ---------- -CNS--PSFL GMCMFHNQVS FKGE--LVLP DNPNENSVEF MYRRHHSLSD KRCLNLLYVG DEQDSSHLRE KQLRL----Y A---RIGK-V -VIIKVWNAV MGRLNHHEPP AAVMFPLRK- -GGSDSKPF- ---------- -------IDG FF-------- ----DRPGIA NAMSVPCG-- --DQWMGIIR GWCSSQHQYG LANHILVHGP SKLHRTYAIN KKMGANVYQQ DSRMKAAAPL YPHIMEYAHG M----VKPFY GISEPKNEAQ KNGEN-PMNV PCVESGECQY ESKHAAMEKL MHQ-SLYLMR ANTMSKPVMG DWVGSRCRAE LTALKTVQLD VGSSGKTLGQ -NIMTPKKLG DRTWLAASPL Q------IGC DVMVQEKKVA SPQ-EVEVAA D---IYFYRN MVIQRLTDVV AAVRMQSPQE MRLPVLVCIQ DVDSPSLGDD TPQK--MNIA PYWVCVCSGV VKKAQLD-SV EKRGKTWVAK S-DFLCPPAV SEGEH---GP EQRPVFGVQN PVCKPWRFVI MNTQTFAANI IRLLYVKDV- SCNAVLQLLD WQLCYSH-ED WQSLIADACP TA-VNIAVCC WKRELMPGL- --NHSCEHLA KSVYFKPDGE -------DEG QMFMKIFNLD WCEVEKSHEK -SSVDDNYN- VNMLPFVQSN GDPVGHYVQG LED-ALLLIR PGT---ARAL VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHKSAIIANQ QGVKGMDWDT GKE-MQQGPK -IVVHLALFY APSNVLAAEP NIIGQPQVSH VFV-DNDARR MLIVGQSVMP PYKFMREACV PFFHKLFMAE NQVRHDYVWN YDIKIYETHF RMGVYAIDNS WETLVYC-EM TSGRIGAKIN HQPRLPEQVV -PPCVLTVSG LKRTI-AGAP DETEITKTLI YHIV--MWAD IMFKNCVMQY STYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHQQNI LTD-ELYSDM ---YPAAPKK YVIRQM-GMG DIGGYELICE DLFQILVCE t50 MIWSEYRSMV REG--SDQSI DNK-C----- -------LTA ARVKLMSGVV RQVRNAPEDA L--YTFFTGQ KGTRELIKY- YF-CMEGCGC VNTMAKNS-S QSAPWKFNHF MC-LMHLMQN --------GD L--------- ---------- -CNS--PSFL GMCMFHNQVS VKGE--LVLP DNPNENSVEF MYRRHHSLSD KRCLNLLYVG DNQDSSHLRE KQLRL----Y A---RIGK-V -VIIKVWNAV MGRLNHHEPP AEVMFPLRK- -CGSDSKPF- ---------- -------IDG FF-------- ----DRPGIA NAMSVPCG-- --DQWMGVIR GWCSSQHQYG LANHVLVHGP SKLHRTYAIN KKMGANVYQQ DSRMKAAAPL YPHIMEYAHG M----VKPFY GISEPKNEAQ KNGEN-PMNV PCVESGECQY ESKHAAMEKL MHQ-SLYLMR ANTMSKPVMG DWVGSRCRAE LTALKTVQLD VGSSGKTLGQ -NIMTPKKLG DRTWLAASPL Q------IGC DVMVQEKKVA SPQ-EVEVAA D---IYFYRN MVIQRLTDVV AAVRMQSPQE MRLPVLVCIQ DVDSPSLGDD TPQK--MNIA PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK S-DFLCPPAV SEGEH---GP EQRPVFGVQN PVCKPWRFVI MNTQT-AKNI IRLLYVKDV- SCNAVLQLLD WQLCYSH-GD WQSLIADACS TA-VNIAVCC WKRELMPGL- --NHSVEHLA KSVYFKPDGE -------DEG QMIMKIFNLD WCEVEKSHEK -SSVDDNYN- VNMLPFVQSN GDPVGHYVQG LED-ALLLIR PGT---ARAL VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHKSAIMANQ QGVKGMDWDT GKE-MQQGPK -IVMHLALFY APSNVLAAEP NIIGQPQVSH VFV-DNDARR MLIVGQSVMP AYKFMREACV PFFHKLFMAE NQVRHDYVWN YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HQPRLPEQVV -PPCILTVSG LKRTI-AGAP DETEITKTLI YHIV--MWAD IMFKNCVMQY STYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHQQNI LTD-ELYSDM ---YPAAPKK YVIRQM-GMG DIGGYELICE DLFQILVCE t25 MIWSEYRSMV REG--SDQSI DNK-C----- -------LTA ARVKMMSGVV RQVRNAPEDA L--YTFFTGQ KGTRELIKY- YF-CMEGCGC VNTMAKNS-S QSAPWKFNRF MC-LMHLMQN --------GD L--------- ---------- -CNS--PSFL GMCMFVNQVS FKGE--LVLP DNPNENSVEF MYRRHHSLSD KRCLNLLYVG DNQDSSHLRE KQLRL----Y A---RIGK-V -VIIKVWNAV MGRLNHHEPP AEVVFPLRK- -GGSDSKPF- ---------- -------IDG FF-------- ----DRPGIA NAMSVPCG-- --DQWMGVIR GWCSSQHQYG LANHILVHGP SKLHRTYAIN KKMGANVYQQ DSRMKAAAPL YPHIMEYAHG M----VKPFY GISEPKNEAQ KNGEN-PMNV PCVESGECQY ESKHAAMEKL MHQ-SLYLMR ANTMSKPVMG DWVGSRCRAE LTALKIVQLD VGSSGKTLGQ -NIMTPKKLG DRTWLAASPL Q------IGC DVMVQEKKVA SPQ-EVEVAA D---IYFYRN MVIQRLTDVV AAVRMQSPQE MRLPVLVCIQ DVDSPSLGDD TPQK--MNIA PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK S-DFLCPPAV SEGEH---GP EQRPVFGMQN PVCKPWRFVI MNTQTFAKNI IRLLYVKDV- SCNAVLQLLD WQLCYSH-GD WQSLIADACP TA-VNIAVCC WKRELMPGL- --NHSCEHLA KSVYFKPDGE -------DEG QMIMKIFNLD WCEVEKSHEK -SSVDDNYD- VNMLPFVQSN GDPVGHYVQG LED-ALLLIR PGT---ARAL VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHKSTIIANQ QGVKGMDWDT GKE-MQQGPK -IVVHLTLFY APSNVLAAEP NIFGQPQVSH VFV-DNDARR MLIVGQSVMP AYKFMREACV PFFHKLFMAE NQVRHDYVWN YDIKIYETHF RMGVYAVDNS WEALVYC-EM TSGRTGAKIN HQPRLPEQVV -PPCILTVSG LKRTI-AGAP DETEITKTLI YHIV--MWAD IMFKNCVMQY STYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHQQNI LTD-ELYSDM ---YPAAPKK YVIRQM-GMG DIGGYELICE DLFQILVCE t51 MIWSEYRSMV REG--SDQSI DNK-C----- -------LIA ARVKMMSGVV RQVRNAPEDA L--YTFFTGQ KGTRELIKY- YF-CMEGCGC VNTMAKNS-S QSAPWKFNRF MC-LMHLMQN --------GD L--------- ---------- -CNS--PSFL GMCMFHVQVQ FKGE--LVLP DHPNENSVEF MYRRHHSLSD KRCLNLLYVG DNQDSQHLRE KQLRI----Y A---RIGK-V -VIIKVWNAV MGRLNHHEPP AEVVFPLRK- -GGSDSKPF- ---------- -------IDG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGVIR GWCSSQHQYG LANHILVHGP SKLHRTYAIN KKMGANVYQQ DSRMKAAAPL YPHIMEYAHG M----VKPFY GISEPKNEAQ KNGEN-PMNV PCVESGECQY ESKHAAMEKL MHQ-SLYLMR VNTMSKPIMG DWVGNRCRAD LTALKTVQLD VGSSGKTLGQ -NIMTPKNLG DKTWLAASPL Q------IGC GIMVQEKKVA SPQ-EVEVAA D---IYFYRN MVIQRLTDVV AAVRMQSPQE MRPPVLVCIQ DVDSPSLGDD TPQM--MNIA PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK S-DFLCPPAI SEGEH---GA EQRPVFGMQN PVCRPWRFVI MNTQTFAKNV IRLLYVKDV- SCNKVLQLLD WQLCYSH-GD WQSLIADACP AA-VNIAVCC WKRELMPGL- --NHSCEHLA KTVYFKPDGE -------DEG QMIMKIFGLD WCEVEKSHEE -SSVDDNYN- VNMLPFVSSN GDPVGHYVQG LED-ALRLIR PGT---ARAL VILF-YGCQF YSGKFQDSEL FERSQLYR-V YCEI----KK DHKSAIIANQ QGVKGMDWDT GKE-MQQGPK -IVVHLALFY APSNVLAAEP NIIGQPQVSH VFV-DNDIRR MLIVGVSVMP AYKFMREACV PFFHKLFMAE NQVRHDYVWN YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVF -PPCILTVSG LKRTI-AGAP DETEITKTLI YHIV--MWAD IMFKNCVMQY ATYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHQQNI LTD-ELYSDM ---YPAAPKK YVIREM-GVG DIGGYELICE DLFQILVCE t9 MIWSECRQMV REK--TDNAI DNQ-C----- -------LIA ARVKENSGHV KQVRNDSEDI L--YNHFTGK KGARELIKR- YV-CMEGHDC VNTVANVS-S QSAIWKFDRF MCDLMHLMQN --------GD L--------- ---------- -CSY--PSFL PMCMFVPQVL SKGE--LVLP DSSNEESISF VYLQHHSFSV KRCLNYLNVG DSADSKHLKE KHLRA----E GMPGNIGK-H -VIINVWNAV MGRLNHHEPP AEVVFPLRQ- -GGPDSKPM- ---------- -------IDG FFDK-D-D-- ----DRPGIA NAMSVPCG-- --EQWGGPTR GWCSSQQKFG LASHTLSHDP SSLHRTFAIN RKMGANVYQQ TSRMKASAPF YPHCMEYAHG V----CKPFY EHSEPKNEAQ GNGEK-PMNV PCVESIDCQY ENKHATMTKL MHH-SLYLMR MHTMSKPIMG DVNGNRCRAD LTALKMLQLD IGFSAKTLGQ -NVVVPHLLG HRTWLATSPL Q------IGC GVMIFGNKIG SAN-EFEAAA DPTVIYFYRN RIVRRLADVI STVRMNSPQE MRN------Q DVDSHSLGDD DNQA--MVIL PYWVCVCSGV VRKEHPDTSI NKRGRTWLAK V-DFLCPPAF NQGEH---RA EQKPASKFEH PVCGPWEFVI VNYQTSAKDI ILLLYIKDV- GCNCVLELLN WQLCTSH-GD WQSLVADSCV WA-HNVAVCA WKRELVPGL- --NHSCERLA KHIYFQPDGE -------DEG KMVLKIFGLD WCEMEKSHQR -SGVDDNYQ- VNMLPFNHSK NNPVGHYVQG LED-ALNLIR PGT---ARAL NILF-YGCEV YSGEFQDSES -ERSWIYN-V YCEI----KK DHKSAIIAHK YEHKGMDWDT GKE-AQQGPE -VVHHNLLFF -------AEP NIHGQPGVGH IFV-GNDARR KLIAGVSFMS MVKFMCEACV PFFRKLFMAV GQMRYDYVNN YDIKIWETVF RGGVYAIENS WETLVLC-EM TSGRCGAKMN HLPRLFEQVC -PPCILTVSG LKETK-AGLI DGTEITKTPD IGIC--MWET IHFKNPVMQY CCYYKENRSV R--------- --VNNTGGAA LPK------- ---------- ----YHHQNT LVD-ELYSDV ---YPAAPKK K--VCM-GVG DVGGYEVMGE DLFQILVCE t47 MIWSECRQMV REK--SDNAI DNQ-C----- -------QIA ARVKRNSGHV KQVRNDSEDI L-CYHHFTGK KGARELIKR- YI-CMEGHDC VNTMANVS-S QSAVWNFDRF MCDLMHLMQN --------GD L--------- ---------- -CSY--PSFL PMCMFVSQVL SKGE--LVLP DSSNEESISF VYLQHHSFSD KRCLNYLNVG DNSDSKHLKE KHLRA----Q GMPGNIGK-H -VIINVWNAV MGRLNHHESP AEVVFPLRQ- -GGPDSKPM- ---------- -------IDG FFDK-D-D-- ----DRPGIA NAMSVPCG-- --EQWGGPTR GWCSSQQKFG LASHTLSHDP SNLHRTFAIN RKMGANVYQQ ASRMKASAPF YHHCMEYAHG V----CKPFY EHSEPKNEAQ GNGEK-PMNV PCVESRDCQY ENKHATMTKL MHQ-SLYLMR MHTMSKPIMG HVNGNRCRAD LTALKMLQLD IGFSAKTLCQ -NIVVAHLLG HRTWLATSPL Q------IGC MVMIFGNKIG SAN-EFEAAA DPTVIYFYRN RIVRRLADVI STVRMNSPQE MRE------Q DVDSHSLGDD DNQA--MNIL PYWVCVCSGV VRKEHPDTSI NKRGRTWLAK M-DFLCPPAF NQGEH---RA EQKPALKFKH PVCGPWEFVI VNYQTSAKDI ILLLYIKDV- GCNCVLELLK WQLCTSH-GD WQSLVADSCV WA-HNVAVCA WKRELVPGL- --NHSCDRLA KHIYFQPDGE -------DEG KMILKIFGLD WCEMEKSHQR -SGVDDNVQ- VNMLPFNHSK HNPVGHYVQG LED-ELNLIR PGT---ARAL NILF-YGCEY YSGEFQDSES -ERSWIYN-V YCEI----KK DHKSAIMACK YRHKGMDWDT GKE-AEQGPE -VVHHNLLFF -------AEP NIQGQPGVGH IFV-GNDARR KLIAGLSFMA MMKFMCEACV PFFRKLFMAV GHMRYDYVSN YDIKIWETVF RGGVYAIENS WETLVLC-EM TSGRCGAKMN HLPRLFEQVC -PPCILTVSG LKETK-AGII DGTEITKTPE IGIC--MWDT IEFKNPVMQY CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHHQNT LVD-ELYSDI ---YPAAPKK K--VCM-GVG DVGGYEVMGE DLFQILVCE t60 MIWSECRQMV REK--TDNAI DNQ-C----- -------QIA ARVKENSGHV KQVRNDSEDI L-CYHHFTGK KGARELIKR- YI-CMEGHDC VNTMANVS-S QSAVWNFDRF MCDLMHLMQN --------GD L--------- ---------- -CSV--PSFL PMCMFVSQVL HKGE--LVLP DSSNEESISF VYLQHHSFSD KRCLNYLNVG DNADSKHLKE KHLRA----Q GMPGNIGK-H -VIINVWNAV MGRLNHHEPP AEVVFPLRQ- -GGPDSKPM- ---------- -------IDG FFDK-D-D-- ----DRPGIA NAMSVPCG-- --EQWGGPTR GWCSSQQKFG LASHTLSHNP SNLHRTFAIN RKMGANVYQQ TSRMKASAPF YHHCMEYAHG V----CKPFY EHSEPKNEAQ GNGEK-PMNV PCVESIDCQY ESKHAAMTKL MHQ-SLYLMR IHTMSKPIMG DVNGNRCRAD LTALKMLQLD IGFSAKTLGQ -NIVVPHCLG HRTWLATSPL Q------IGC MVMIFGNKIG SEN-EFEAAA DPTVIYFYRN RIVRRLADVI STVRMNSPQE MRN------Q DVDSHSLGDD DNQA--MNIL PYVVCVCSGV VRKDHPDTSI HKRGRTWLAK V-DFLCPPAF NQGEH---RA EQKPAFKFKH PVCGPWEFVI VNYQTEAKDI ILLLYIKDV- GCNCVLELLE WQLCTSH-GD WQSLVADSCV WA-HNVAVCA WKRELVPGL- --NHSCERLA KHIYFQPDGE -------DEG KMILKIFGLD WCEMEKSHQR -SGVDDNYQ- VNMLPFNHSK HNPVGHYVQG LED-ALNLIR PGT---ARAL NILF-YGCEY YSGEFQDSES -ERSWIYN-V YCEI----KK DHKSAIMAYK FEHKGMDWDT GKE-AEQGPE -VVHHNLLFF -------AEP NIQGQPGVGH IFV-GNDARR KLIAGVSFMA VMKFMCEACV PFFRKLFMAV GQMRYDYMNN YDIKIWETFF RGGVYAIENS WETLVLC-EM TSGRCGAKMN HLPRLFEQVC -PPCILTVSG LKETR-AGII EGTEITKTPD IGIC--MWDT IHFKNPVMQY CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHHQNT LVD-ELYSDI ---YPAAPKK K--VCM-GVG EVGGYEVMGE DLFQILVCE t30 MIWSECRQMV REK--TDNAI DNQ-C----- -------QIA ARVKENSGHV KQVRNDSEDI L-CYHHFTGK KGARELIKR- YI-CMEGHDC VNTMANVS-S QSAVWNFDRF MCDLMHLMQN --------GD L--------- ---------- -CSV--PSFL PMCMFVSQVL HKGE--LVLP DSSNEESISF VYLQHHSFSE KRCLNYLNVG DNADSRHLKE KHLRA----Q GMPGNIGK-H -VIISVWNAV MGRLNHHEPP AEVVFPLRQ- -GGPDSKPM- ---------- -------MDG FFDK-D-D-- ----DRPGIA NAMSVPCG-- --EQWGGPTR GWCSSQQKFG LASHTLSHTP SNLHRTFAIN RKMGANVYQQ TSRMKASAPF YHHCMEYAHG V----CKPFY EHSEPKNEAQ GNGEK-PMNV PCVESIDCQY ENKHATMTKL MHQ-SLYLMR MHTMSKPIMG DVNGNRCRAD LTALKMLQLD IGFSAKTLGQ -NIIVPHCLG HRTWLATSPL Q------IGC MVMIVGNKIG SEN-EFEAAA DPTVIYFYRN RIVRRLADVI STVRMNSPQE MRN------Q DVDSHSLGDD DNQA--MNIL PYVVCVCSGV VRKEHPDTSI SKRGRTWLAK I-DFLCPPAF NQGEH---RA EQKPAFKFKH PCCGPWEFVI VNYQTSAKDI ILLLYIKDV- GCNCVLELLK WQLCTSH-GD WQSLVADSCV WA-HNVAVCE WKRELVPGL- --NHSCERLA KHIYFQPDGE -------DEG KMILKIFGLD WCEMEKSHQR -SGVDDNYQ- VNMLPFKHSK HNPVGHYVQG LED-ALNLIR PGT---ARAL NILF-YGCEY YSGEFQDSES -ERSWIYN-V YCEI----KK DHKSAIMAYK YEHKGMDWDT GKE-AEQGPE -VVHHNLLFF -------AEP NIHGQPGVGH IFV-GNDARR HLIAGVSFMA VVKFMCEACV PFFRKLFMAV GQMRYDYVNN YDIKIWETVF RGGVYAIENS WETLVLC-EM TSGRCGAKMN HLPRLFEQVC -PPCILTVSG LKETR-AGII DGTEITKTPD IGIC--MWDT IHFKNPVMQY CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHDQNT LVD-ELYSDV ---YPAAPKK K--VCM-GVG DVGGYEVMGE DLFQILVCE t70 MIWSECRQMV REK--NDNAI DNQ-C----- -------LIA ARVKEDSGHV KQVRNASEDI M--YNHFTGK KGARELIKR- YI-CMEGHDC VNTMANVS-S QSAVWKFDRF MCDLMHLMQD --------GD L--------- ---------- -CSY--PSFL PMCMFVAQVI AKGE--LVLP DSSTEESISF MYLQHHSFSD KRCLNYLNVG DNEDSKHAKE KFLRG----D G---GIGQ-H -VIINVWNAV MGRLNHHEPP PEVVFPLRQ- -GGPDSKPM- ---------- -------VDG FFDK-D-D-- ----DRPGIA NAMNVPCG-- --DQWGGPIR GWCSSQHKFG LAFHTLSHDP SKLHRTFAIN QKMGANVYQQ MSRMKTPAPF YPHVMEYAHG V----CKPFY EHSEPKNEAQ GNGEKVPMNV PCVESMDCQY ENKHAPMAKL MHQDSLYLMR MHTMSKPLMG DVNGNRCRAD LTSLKMLQLD IGFCAKTLGQ -NIVVPKLLG HRTWLATAPL Q------IGC GMMIFGNKIG STQ-EFEAAA DPTVIYFYRN RIVRRMADVI STVRMKSPQE MRG------Q DVDSHSLGDD DNQA--MNIL PYWVCVCSGV VRKEHPDTSI NKRGRTWLAK I-DFLFPPAF NQGEH---HA EQKPVFGFKH PHCGPWEFVI CNYQTVAKDI ILLLYIKDV- GCNCVLELLA WQLCTSH-GD WQSLVADSCI WA-HNVAVCA WKRELVPGL- --NHSCEHLA KHIYFQPDGE -------DEG KMILKIFGLD WCEVERSHQR -SGVDDNYK- VNMLPFSHSK HNPVGHYVQG LGD-ALRLIR PGT---ARAL NILF-YGCQY CSGEFQDSEE -ERSWIYN-V YCEI----KK DHKSAILAHK YKHKGMDWDT GKE-MEQGPK -VVNYNLLFY -------AEP NIHGQPRVGH IFV-GNDAHR KLIAGVSFMS IMKFMCEACV PFFRKLFMAV RQMRYMYMNN YDIKIWETAF RGGVYAIENS WETLVLC-EM TSGRSGAKMN HLPRLFEQVC -PPCLLTVSG LKETV-AGII DGTEITKTPE IGIC--MWDT IHFKNPVMQY CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHNQNT LVD-ELYSDV ---YPAAPKK Q--HYM-GVG DVGGYEVMGE DLFQILVCE t91 MIWSECRQMV REK--NDNAI DNQ-C----- -------LIA ARVKEDSGHV KQVRNASEDI M--YNHFTGK KGARELIKR- YI-CMEGHDC VNTMANVS-S QSAVWKFDRF MCDLMHLMQD --------GD L--------- ---------- -CSY--PSFL PMCMFVAQVI AKGE--LVLP DSSTEESISF MYLQHHSFSD KRCLNYLNVG DNEDSKHAKE KFLRG----D G---GIGQ-H -VIINVWNAV MGRLNHHEPP PEVVFPLRQ- -GGPDSKPM- ---------- -------VDG FFDK-D-D-- ----DRPGIA NAMNVPCG-- --DQWGGPIR GWCSSQHKFG LAFHTLSHDP SKLHRTFAIN QKMGANVYQQ MSRMKTPAPF YPHVMEYAHG V----CKPFY EHSEPKNEAQ GNGEKVPMNV PCVESMDCQY ENKHAPMAKL MHQ-SLYLMR MHTMSKPLMG DVNGNRCRAD LTSLKMLQLD IGFCAKTLGQ -NIVVPKLLG HRTWLATAPL Q------IGC GMMIFGNKIG STQ-EFEAAA DPTVIYFYRN RIVRRMADVI STVRMKSPQE MRG------Q DVDSHSLGDD DNQA--MNIL PYWVCVCSGV VRKEHPDTSI NKRGRTWLAK I-DFLFPPAF NQGEH---HA EQKPVFGFKH PHCGPWEFVI CNYQTVAKDI ILLLYIKDV- GCNCVLELLA WQLCTSH-GD WQSLVADSCI WA-HNVAVCA WKRELVPGL- --NHSCEHLA KHIYFQPDGE -------DEG KMILKIFGLD WCEVERSHQR -SGVDDNYK- VNMLPFSHSK HNPVGHYVQG LGD-ALRLIR PGT---ARAL NILF-YGCQY CSGEFQDSEE -ERSWIYN-V YCEI----KK DHKSAILAHK YKHKGMDWDT GKE-MEQGPK -VVNYNLLFY -------AEP NIHGQPRVGH IFV-GNDAHR KLIAGVSFMS IMKFMCEACV PFFRKLFMAV RQMRYMYMNN YDIKIWETAF RGGVYAIENS WETLVLC-EM TSGRSGAKMN HLPRLFEQVC -PPCLLTVSG LKETV-AGII DGTEITKTPE IGIC--MWDT IHFKNPVMQY CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHNQNT LVD-ELYSDV ---YPAAPKK Q--HYM-GVG DVGGYEVMGE DLFQILVCE t49 MIWSECRQMV REK--NDNAI DNQ-C----- -------LIA ARVKENSGHV KQVRNASEDI M--YNHFTGN KGACELIKG- YV-CMEGHDC VNTMANIS-S QSAVWKFDRF MCDLMHLMQD --------GD L--------- ---------- -CSY--PSFL PMCMFVAQVI AKGE--LVLP DSSNEESISF MYLQHHSFSD KRCLNCLNVG DNEDSKHLKE KFLRA----D G---GIGQ-H -VIINMWNAV MGRLNHHEPP PEVVYPLRQ- -GGPNSKPM- ---------- -------VDG FFDK-D-D-- ----DRPGIA NAMNVPCG-- --DQWGGPIR GWCSSQHRFG LAFHTLKHDP SKLHRTFAIN EKMGANVYQQ NSRMKASAPF YPHVMEYAHG V----CKPFY EHSEPKNDAQ GNGDKVPMNV PCVESMDCQY ENKHAPMAKL MHQ-SLYLMR MYTMSKPIMG DVNGNRCRAE LTSLKMLQLD IGFSAKTLGQ -NIVPPKLLG HRTWLATSPL Q------IGC GVMIFGNKIG STQ-EFESAA DPTVIYFYRN RIVKRMADVI STVRMKSPQE MRG------Q DVDSHSLGDD DNQA--MNIL PYWVCVCSGV VRKEHPDTSI NKRGRTWLAK I-DFLFPPAF NQGEH---HA EQKPVFGFKH PHCGPWEFVI CNYQTMAKDI ILLLYIKDV- GCNCVLELLA WQLCTSH-GD WQSLVADSCI WA-HNVAVCA WKRELVPGL- --NHSCEHLA KHIYFQPDGE -------DEG KMIIKIFGLD WCEVEKSHQR -SGVDDNYK- VNMLPFSHSN HNPVGHYVQG LGD-ALRLIR PGT---ARAL NILF-YGCEY YSGEFQDSEE -ERSWIYN-V YCEI----KK DHKSAIMAHK YEHKGMDWDT GKE-MEQGPK -VVHHNILFY -------AEP NIHGQPRVGH IFV-GNDAVR NLIAGVSFMS IMKFMCEACV PFFRKLFMAV RQMRYMYMNN YDIKIWETAF RGGVYAMENS WETLVLC-EM TSGRVGARMN HLPRLFEQVC -PPCILTVSG LKETV-AGVI DGTEITKTPE IGIC--MWDT IHFKNPVMQY CCYYKEPRSV R--------- --VNNTGGAE LPK------- ---------- ----YHNFNA LVD-ELYSDV ---YPAAPKK K--HYM-GVG DVGGYEVMGE DLFQILVCE t52 MIWSEERQMV REK--VDNAV DNQ-C----- -------LIA ARVKENCGHV KQVRNASEDI L--YNHFTGK KGARELIKR- YI-CMEGHDC VNTMAHDS-S QSATWKFDRF MCDLMHLMQG --------GD L--------- ---------- -CSY--PSFL PVCMFVAQVI NKGE--LVLP DSSNEESISF MYLQHHSFSE KRCLNYLNVG DNEDSKHLKE KFLRA----D G---NIGQ-H -VIINMWNAV MGRLNHHEPP AEVNFPLRQ- -GGPDSKPM- ---------- -------VDG FFDK-D-D-- ----DSPGIA NAMNVPCG-- --DQWGGPIR GWCSSQHKFG LASHTLSHDP SKLHRTFGIN KNMGANVYQQ TSRMKANAPF YPHVMEYAHG V----CKPFY EHSEPKNEAQ GNGEK-PMNV PCVESIECQY ENKHATMAKL MHD-SLYLMR MHTMSKPVMG DVNGHRCRAD LTALKMLQLD IGFSAKTLGQ -NIVAPKLLG VRTWLATSPL Q------IGC GVMIFGNKIG STQ-EFEAAA DPTVIYFYRN RIIRRVADVI STVRMKSPQE MRE------Q DVDSHSLGDD DNQA--MTIL PHWVCVVSGV VRKEHPDTSI NKRGSTWLAK V-DFLCPPAF NQGEH---HA EQKPVFGFKH PHVGPWEFVI CNYQTSAHDI ILLLYIKDV- GCNCVLELLV WQLCTSH-GD WQSLVADSCV WA-HNVAVCA WKRELVPGL- --NHSCERLA KHIYFQPDGE -------DEG KMILKIFELN WCEVEKSHQH -SGVDDNYQ- VNMLPFDHCK HNPVGHYVQG LED-ALGLIR PGT---ARAL NILF-YGCQY YSGEFQDSEA -ERSWIYN-V YCEI----KK DHKSGIMAHK YDHKGVDWDT GKE-MEQGPK -VLHHNLLFY -------AEP NIHGQPTVGH IFV-GNDARR KLIAGVSFMS VVKFMCEACV PFFRKLFMAF RQMRYDYVNN YDIKIWETVF RGGVYAIENS WETLILC-EM TSGRCGAKMN HMPRLFEQVC -PPCILTVSG LKETM-AGVI DGTEITKTPD IGIG--MWDS IHFKNPVMQY CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHHQNS LVD-ELYSDV ---YPAAPKK K--HYM-GVG DVGGYEVMGE DLFQILVCE t43 MIWSEVRQMV REG--SDNAI DNR-C----- -------LIA ARVKHVQGHV KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHDC VNTMAKPS-S QSAVWKFDRF MCALMHLMQK --------GD L--------- ---------- -CSH--PSFL GMCMFHEQVR CKGE--LVLP DSPNEESVSF VYIQHHSFSD KRCLNCLNVG DNDDSEHLKE KHLRI----Y G---QIGH-H -VIINVWNAF MGRLNHHEPP AEVVYPLRK- -GGPDSK-M- ---------- -------VDG FFDK-E-D-- ----DRPGIA NAVSVPCA-- --DQVGGPIR GWCSSQVNFG LANHTLVQ-P SKLHRTFKIN KKMGANVYQQ TSRMKAEAPV YPHIMEYAHG V----CKPFY ERSEPKNEAQ GNGEK-QGNV PCVESMECQY ENKHAIMAKL MHP-PLYLMR METMSKPPMG DVDGHRCRAG LTALKVVQLD IGFSSKTLGQ LNIIDCKLLG HRTWLATSPL Q------IGV DVMVMGNKIG SPS-EFEVAA DPTIIWFYRN CIVHKLADTV STAKMKSPQE MRH------Q DVDSPSLGDD DQQA--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK N-DFLCPPAF NQGEH---ER EQRPVYGFKH PGCRPWQFVI ANYQTSAKNI IMLLYVKDV- SCNGVLQLLN WHLCDSH-GD WQSLVADSCC WA-HNVAVCG WKRELVPGL- --NHSCEMLA KTVYFEPDGE -------DEG KMVLKIFELD WVEMEKSHQQ -SSVDDNYL- VNMLPFLHSR QNPVGHYVQG LED-PMHLIR PGT---ARKL NILF-YGCEY YSGQFPDGEA -ERSWIYD-V YCEM----KK DHKSAVIAHK HEHKGMDWDT GKE-MNQGPK -IVLHGVLFN -------AEP NIHGQPGVSH VFM-GNDATR KLITGVSVMP TYKFMCGACV PFFHKLFMAV RNMRYDYNVN YDIKIWETHI RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI -PPCVLTVSG LKQPM-AGYN DQTEITKTPD ICIC--TWGT IHFKNFVMQY CVYYKENRSV R--------- --VNNTGGAE LPQ------- ---------- ----YHHQNI LVD-ELYSDM ---YPAAPKK K--HYV-GAG DVGGYEVMSE DLFQILVHE t1 MIWSEVRQMM REG--TDNAI DNQ-C----- -------LIA ARVKHMQGHV KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHDC VNTMAKPS-S QSAVWKFDRF MCCLMHLMQN --------GD L--------- ---------- -CSH--PSFL GMCMFVEQVR CKGE--LVLP DSPNEESLSF VYIQHHSFSD KRCLNCLNVG DNDDSEHLKE KHLRI----Y G---QIGR-H -VIINVWNAF MGRLNHHEPP AEVVYPLRK- -GGPDSKPV- ---------- -------VDG FFDK-E-D-- ----DRPGIA NAVSVPCA-- --DQVGGPIR GWCSSQVKFG LANHTLVQ-P SKLHRTFKIN KKMGANVYQQ TSRMKAEAPV YPHIMEYAHG V----CKPFY ERSEPKNEAQ GNGEK-QGNV PCVESMECQY ENKHAIMAKL MHP-PLYLMR METMSKPPMG DVDGHRCRAK LTALKIVQLD IGFSAKTLGQ LNIIDCKLLG RHTWLATSPL Q------IGC DMMVMGNKIG SPS-EFEVAA DPTIIWFYRD CIVHKLADTV STAKMKAPQE MRV------Q DVDSPSLGDD DQQE--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK N-DFLCPPAF NQGEH---EA EQRPVYGFVH PRCRPWQFVI ANYQTSAKNI IMLLYVKDV- SCNGVLQLLN WQLCDSH-GD WQSLVADSCV WA-HNVAVCG WKRELVPGL- --NHSCEMLA KTVYFEPDGE -------DEG KMVLKIFELD WVEMEKSHQQ -SSVDDNYL- VNMLPFMQSR ENPVGHYVQG LED-PMHLIR PGT---ARKL NILF-YGCEY YSGQFPDGEA -ERSWIYD-V YCEM----KK DHKSAVVAHK VEHKGMDWDT GKE-MNQGPK -IVLHGVLFN -------AEP NIHGQPGVSH VFM-GNDATR KLITGVSVMP TYKFMCGACV QFFHKLFMAV RNMRYDYTVN YDIKIWETHI RQGVYAVENS WETLITC-EM TSGRIGAKIN HLPRLPEQVV -PPCILTVSG LKQPM-AGYN DETQITKTPD ICIC--TWGT IHFKNSVMQY CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHHQNI LVD-ELYSDM ---YPAAPKK K--HYV-GAG DVGGYEVMSE DLFQILVHE t86 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKPVQGHV KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHDC VNTMAKPS-S QSAVWKFDRF MCALMHLMQN --------GD L--------- ---------- -CSH--PSFL GMCMFHEQVR RKGE--LVLP DSPNEESVSF VYIQHHSFSD KRCLNCLNVG DNDDSEHLKE KHLRI----Y G---QIGR-H -VIINVWNAF MGRLNHHEPP AEAVYPLRK- -GGPDSKPM- ---------- -------VDG FFDK-E-D-- ----DRPGIA NAVSVPCA-- --DQVGGPIR GWCSSQVKFG LANHTLVQ-P SKLHRTFKIN KKMGANVYHQ TSRMKAEAPV YPHIMEYAHG V----CKPFY ERSEPKNEAQ GNGEK-QGNV PCVESMECQY ENKHAIMAKL MHP-PLYLMR METMSKPPMG DVDGHRCRAG LTALKIVQLD IGFSAKTLGQ LNIIDCKLLG HRTWLATSPL Q------IGC DVMVMGNKIG SPS-EFEVAA DPTIIWFYRN CIVHKLADTV STAKMKAPQE MRV------Q DVDSPSLGDD DQQA--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK N-DFLCPPAF NQGEH---GA EQRPVYGFQH PRCRPWQFVI ANYQTFAKNI IMLLYVKDV- SCNRVLQLLN WQLCDSH-GD WQSLVADSCV WA-HNVAVCG WKRELVPGL- --NHSCEMLA KTVYFEPDGE -------DEG KMVLKIFELD WVEMEKSHQQ -SSVDDNYL- VNMLPFIHSR DNPVGHYVQG LED-PMHLIR PGT---ARKL NILF-YGCEY YSGQFPDGEA -ERSWIYP-V YCEM----KK DHKSAVVAHK AEHKGMDWDT GKE-MNQGPK -IVLHGVLFN -------AEP NIHGQPGVSH VFM-GNDATR KLITGVSVMP TYKFMCGACV PFFHKLFMAV RNMRYDYTVN YDIKIWETHI RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI -PPCILTVSG LKQPM-AGYN DETQITKTPD ICIC--GVGT IHFKNLVMQY CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHHQNI LVD-ELYSDM ---YPAAPKK K--HYV-GAG DVGGYEVMSE DLFQILVHE t72 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKHVQGHV KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHDC VNTMAKVS-S QSAVWKFDRF MCALMHLMQN --------GD L--------- ---------- -CSH--PSFL GMCMFHEQVR RKGE--LVLP DSPNEESVSF VYIQHHSFSD KRCLNCLNAG DNDDSEHLKE KHLRT----Y G---QIGR-H -VIINVWNAF MGRLNHHEPP AEVVYPLRK- -GGPDSKPM- ---------- -------VDG FFDK-E-D-- ----HRPGIA NAVSVPCA-- --DQVGGPLR GWCSSQVKFG LANHTLVQ-P SKLHRTFKIN KKIGANVYQQ TSRMKADAPV YPHIMEYAHG V----YKPFY ERSEPKNEAQ GNGEK-QGNV PCVESVECQY ENKHAIMAKL MHP-PLYLMR METMSKPPMG DVDGHRCRAG LTALKIVQLD IGFSAKTLGQ LNIIDCKLLG HRTWLATSPL Q------IGC DVMVMGNKIG SPS-EFEVAA DPTIIWFYRN CIVHKLADTV STAKMKAPQE MRH------Q DVDSPSLGDD DQQA--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK N-DFLCPPAF KQGEH---EA EQRPVYGFQH PRCRPWQFVI ANYQSSAKNI IMLLYVKDV- SCNGVLQLLN WQLCDSH-GD WQSLVADSCV WA-HNVAVCG WKRELVPGL- --NHSCEMLA KTVYFEPDGE -------DEG QMVLKIFELD WVEMEKSHQQ -SSVDDNYL- VNMLPFIHSR ENPVGHYVQG LED-PMHLIR PGT---ARKL NILF-YGCEY YSGQFPDGEA -ERSWIYD-V YCEM----KK DHKSAVVAHK VEHKGMDWDT GKE-MNQGPK -IVLHGVLFN -------AEP NIHGQPGVSH VFM-GNDATR KLITGFSIMP TYKFMCGACV PFFHKLFMAV RNMRYDYTVN YDIKIWETHI RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI -PPCILTVSG LKQPV-AGYN DETQITKTPD ICIC--TWGT IHFKNSVMQY CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHHQNI LVD-ELYSDM ---YPAAPKK K--HYV-GAG DVGGYEVMSE DLFQILVHE t96 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKHVQGHV KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHDC VNTMAKPS-S QSAVWKFDRF MCALMHLMQV --------GD L--------- ---------- -CSH--PSFL GMCMFHEQVR RKGE--LVLP DSPNEESVSF VYIQHHSFSD KRCLNCLNVG DNDDSEHLKE KHLRV----Y G---QIGR-H -VIINVWNAF MGRLNHHEPP AEVVYPLRK- -GGPDSKLM- ---------- -------VDG FFDK-E-D-- ----ERPGIA NAVSVPCA-- --DQVGGPIR GWCSSQVKFG LANHTLVQ-P SKLHRTFKIN KKIGANVYQQ TSRMKAEAPV YPHIMEYAHG V----CKPFY ERSEPKNEAQ GNGEK-QGNV PCVESMECQY ENKHAIMAKL MHP-PLYLMR METMSKPPMG DVDGHRCRAG LTALKIVQLD IGFSAKTLGQ LNIIDCKLLG HRTWLATSPL Q------IGC DVMVIGNKIG SPS-EFEIAA DPTIIWFYRN CIVHKLADTV STAKMKAPQE MRV------Q DVDSPSLGDD DQQA--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK N-DFLCPPAF NQGEH---EA EQRPVYGFQH PRCRPWQFVI GNYQTSAKNI IMLLYVKDV- SCNGVLQLLN WQLCDSH-GD WQSLVADSCM WA-HNVAVCG WKRELVPGL- --NHSCEMLA KTVYFEPDGE -------DEG KMVLKIFELD WVEMEKSHQQ -SSVDDNYL- VNMLPFIRSR ENPVGHYVQG LED-PMHLIR PGT---ARKL NILF-YGCEY YSGQFPDAEA -ERSWIYD-V YCEM----KK DHKSAVYAHK VEHKGMDWDT GKE-MNQGPK -IVLHGVLFN -------AEP NIHGQPGVSH VFM-GNDATR KLISGVSVMP TYKFMCGACV PFFHKLFMAV RNMRYDYTVN YDVKIWETHI RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI -PPCILTVSG LKQPM-AGYN METQITKTPD ICIC--TWGT IHFKNSVMQY CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHHQNI LVD-ELYSDV ---YPAAPKK K--HYV-GAG DVGGYEVMSE DLFQILVHG t44 MIWSEVRQMV REG--TDNAI DTR-C----- -------LIA ARVKHMQGHV KQVRNANEDA L--YQNFSGQ KGAREKIKR- YL-CMEGHDC VNTMAKPS-S QSAVWKFDRF MCDLMHLMQN --------GD L--------- ---------- -CSH--PSFL GMCMFHEQVR CKGE--LVLP DSPNEESVSF VYIQHHSFSD KRCLNYLNVG DNDDSEHLKE KHLRI----Y G---VIGK-H -VIINVWNAF MGRLNHHEPP ADVEYPLRK- -GGPDSKPM- ---------- -------VDG FFDK-E-D-- ----DRPGIA NAVSVPCG-- --DQVGGPIR GWCSSQVKFG LANHTLAQNP SKLHRTFKIN KKMGANVYQQ TSRMKAEAPV YPHIMEYAHG V----CKPFY DRSEPKNEAQ GNGEK-QGNV PCVESMDCQY ENKHAIMAKL MHP-PLYLMR METMSKPPMG YVDGHRCRAG LTALKIVQLD IGFSAKTLGQ LNIIDCKLLG HRTWLATSPL Q------IGC DIMVMGNKIG SPS-EFEVAA DPTIIWFYRN CIVHKLADVV STAKMKSPQE MRV------Q DVDSPSLGDD DQQA--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK N-DFLCPPAF EQGEH---EA EQCPVHGFKH PRCRPWQFVI ADYQTSAKNI IMLLYVKDV- SCNGVLQLLN WQLCDSH-GD WQSLVADSCV WA-HNVAVCG WKRELVPGL- --NHSCEMLA KTVYFEPDGE -------DEG KMVLKIFELD WVEMEKSHQQ -SSVDDNYL- VNMLPFMHSR ENPVGHYVQG LED-PMHLIR PGT---ARKL NILF-YGCEY YSGQFPDCEA -ERSWIYD-V YCEM----KK DHKSAVVAHK HEHKGMDWDT GKE-MNQGPK -VVMHGVLFN -------AEP NIHGQPGVSH VFM-GNDATR KLITGVSVMP TYKFMCNACV PFFHKLFMAV RNMRYDYTVN YDIKIWETHM RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI -PPCILTVSG LKQPM-AGYN DETEITKTPD ICIC--TWGT IHFKNSVMQY CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHHQNM LVD-ELYSDM ---YPAAPKK K--VYV-GAG DVGGYEVMSE DLFQILVHA t45 MIWSEVRQMV REG--TDNAI DNC-C----- -------LIA ARVKHMQGHV KQVRNANEDA L--YQNFTGQ KGAREKIKR- YK-CMEGHDC VNTMAKSS-S QSAVWKFDRF MCDLMHLMQN --------GD L--------- ---------- -CSV--PSFL GMCMFHEQVR CKGE--LVLP DSPNEESVSF VYIQHHSFSD KRCLNYLNVG DNDDSEHLKE KHLRI----Y G---EIGK-H -VIINVWNAF MGRLNHHEPP AEVMYPLRK- -GGPESKPM- ---------- -------VDG FFDK-E-D-- ----DRPGIA NAVSVPCT-- --DQVGGPIR GWCSSQVKFG LANHTLVHNP SKLHRTFAIN KKMGANVYQQ TSRMKAEAPV YPHIMEYAHG V----CKPFY ERSEPKNEAQ GNGEK-QGNV PCVESMDCQY ENKHAIMAKL MHP-PLYLMR METMSKPPMG DVDGHRCRAG LTALKIVQLD IGFSAKTLGQ LNIIDCKLLG HRTWLATSPL Q------IGC DVMVMGNKIG SPA-EFEVAA DPTIIWFYRN CIVHKLADLV STAKMKSPQE MRV------Q DVDSPSLGDD DQEA--MDIL PYWVCVMSGV VRKEQPD-SV NKRGRTWLAK K-DFLVPPAF NQGEH---EA EQKPVYGFDH PRCRPWQFVI ANYQTSAKNI IMLLYVKDV- SCNGVLQLLN WQLCDSH-GD WQSLVADSCV WA-HNVAVCG WKRELVPGL- --NHSCEMLA KTVYFEPDGE -------DEG KMVHKIFELD WVEMEKSHQQ -SSVDDNYL- VNMLPFFHSR ENPVGHYVQG LED-PMHLIR PGT---ARKL NILF-YGCQY YSGQFPDGEA -ERSWIYD-V YCEM----KK DHKSAVVAHK HQHKGMDWDT GKE-MNQGPK -VVLHGVLFN -------AEP NIHGQPGVSH VFM-GNDATR KLIPGVSVMH DYKFMCGACV PFFHKLFMAV RNMRYDYTVN YDIKIWETHL RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI -PPCILTVSG LKQPM-AGFV DETEITKTPD ICIC--TWGT IHFKNSVVQY CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHHQNI LVD-ELYSDM ---YPAAPKK K--HYV-GAG DVGGYEVMSE DLFQILVHE t74 MIWSEVRQMV REG--TDNAI DNC-C----- -------LIA ARVKHMQGHV KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHDC VNTMAKPS-S QSAVWKFDRF MCDLMHLMQN --------GD L--------- ---------- -CSV--PSFL GMCMFHEQVR CKGE--LVLP DSPNEESVSF VYIQHHSFSD KRCLNYLNVG DNDDSEHIKE KHLRI----Y G---EIGK-H -VIINVWNAF MGRLNHHEPP AEVMYPLRK- -GGPESKPM- ---------- -------VDG FFDK-E-D-- ----DRPGIA NAVSVPCA-- --DQVGGPIR GWCSSQVKFG LANHTLVHNP SKLHRTFAIN KKMGANVYQQ TSRMKAEAPV YPHIMEYAHG V----CKPFY ERSEPKNEAQ GNGEK-QGNV PCVESMDCQY ENKHAIMAKL MHP-PLYLMR METMSKPPMG DVDGHRCRAG LTALKIVQLD IGFSAKTLGQ LNIIDCKLLG HRTWLATSPL Q------IGC DVMVVGNKIG SPA-GFEVAA DPTIIWFYRN CIVHKLADLV STAKMKSPQE MRV------Q DVDSPSLGDD DQEA--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK K-DFLVPPAF NQGEH---EA EQKPVYGFDH PRCRPWQFVI ANYQTSAKNI IMLLYVKDV- SCNGVLQLLN WQLCDSH-GD WQSLVADSCV WA-HNVAVCG WKRELVPGL- --NHSCEMLA KTVYFEPDGE -------DEG KMVHKIFELD WVEMEKSHQQ -SSVDDNYL- VNMLPFFHSR ENPVGHYVQG LED-PMHLIR PGT---ARKL NILF-YGCQY YSGQFPDGEA -ERSWIYD-V YCEM----KK DHKSAVVAHK HQHKGMDWDT GKE-MNQGPK -VVLHGVLFN -------AEP NIHGQPGVSH VFM-GNDATR KLIPGVSVMH DYKFMCGACV PFFHKLFMAV RNMRYDYTVN YDIKIWETHL RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI -PPCILTVSG LKQPM-AGFV DETEITKTPD ICIC--TWGT IHFKNSVVQY CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ----YHHQNI LVD-ELYSDM ---YPAAPKK K--HYV-GAG DVGGYEVMSE DLFQILVHE t26 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKHMSGHV KQVRNANEDA L--YQNFTGK KGAREKIKR- YL-CMEGHEC VVTMAKNS-S ISAMWKFDRF MCDLMHLMQN --------GD L--------- ---------- -CSF--PSFL GMCMFHDQVR CKGE--LVLP DSKNEESVSF VYIQHHSFSD KRCLNYLNVG DNEDSEHLKE KHLRV----Y G---NIGK-H -VIINSWNAM MGRLNHHEPA AEVVYPLRK- -GGPESKPM- ---------- -------VDG FFDK-E-D-- ----DRPGIA NAVSVPCD-- --DQWGGPVR GWCSSQHKFG LADHTLIHIP SKLHRTFAIN KKMGANVYQQ TSRMKAAAPV YSHVMEYAHK V----VKPFY ARSEPKNEAQ GNGEK-QWNV PCVESMDCQY ENKHATMAKL MHP-PLYLMR MQTMAKPPMG DVDGHRCRAG LTALKIVQLD IGFSQKTLGQ -NIIDSKHLG HRTWLATSPL Q------IGC SAMVMGNKIG SPQ-EVELAA DPTVIYFYRN CIVQKMADVV STVKMKSPQE MRV------Q DVDSASLGDD DQQP--MNII PYWVCVTSGV VHKEQPD-SV NKRGRTWTAK N-DFLCPDAF NQGEH---GA EQKPTHGFEH PRCRPWQFVI PVYQTGAKNI ILLLYIKDV- SVNGVLQLLI WQLCDSH-GD WQSLVADSCA WA-VNVAVCG WKRELVPGL- --NHACEMLA KTVYFEPDGE -------DEG KMVIKIFGLD WCEIEKSHQQ -SSVDDNYL- VNMLPFTHSR ETPVGHYVQG LED-PMHLMR PGT---ARAL NILF-YGCEY YSGQFPDGEP -ERSWIYD-V YCEL----KK DHKNAIVAVK HEHKGMDWDT GKE-MNHGPK -VIVHGVLFH -------AEP NIGGQPGVSH VFV-GNDATR KLIAGVSVMP IVKFMCGACV PFFRKLFMAL RNMRYDYASN YDIKIVETHL REGVYAVENS WETLVVC-EM TSGRIGAKIN HLPRLPEQVI -PPCILTVSG LKQSV-SGYN DETEITKTPD IKIC--TWGT IHFKNSVMQY CCYYKENRSV R--------- --LNNTGGAE LPK------- ---------- ----YHHQNV LVD-ELYGEA ---YPAAPKK K--HYM-GAG DVGGYEVMSE DLFQILVHE t61 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKHMSGHV KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHEC VNTMAKNS-S VSAVWKFDQF MCDLMHLMQN --------GD L--------- ---------- -CSF--PSFL GMCMFHEQVR CKGE--LVLS DSKNEESVSF VYIQHPSFSD KRCLNYLNVG DNEDSEHLKE KVLRV----Y G---DIGK-H -VIINVWNAM MGRLNHHEPA AEVVCPLRK- -GGPDSKPM- ---------- -------VDG FFDK-E-D-- ----DRPGVA NAVSVPCD-- --DQWGGPVR GWCSSQHKFG LADHTLVHIP SKLHRTFAIT KKMGANVYQQ TSRMKATAPV YPHVMEYAHG V----VKPFY DRSEPKNEAQ GNGEK-QWNV PCVESMDCQY ENKHATMAKL MHP-PLYLVK MQTMSKPPMG DVDGHRCRAG LTALKIVQLD IGFSSKTLGQ -NIIHSKLLG HRTWLATSPL Q------IGC DVMVMGNKIG SPQ-EVELAA DPSVIYFYQN CIVQKMADVV STVKMKSPQE MRV------Q DVDSASLGDD DQQP--MNII PYWVCVTSGV VRKEQPD-SV NKRGHTWTAK N-DFLCPPAY NQGEH---GA EQKPVHGFEH PRCRPWQFVI SVYRTGAKNI NLLLYIKDV- SCNGVLQLLN WQLCDSH-GD WQSLVADSCA WA-VNVAVCG WKRELVPGL- --NHACEMLA KSVYFEPDGE -------DEG KMVLKIFGLD WCEIEKSHQQ -SSVDDNYL- VNMLPFAHSR QTPVGHYVQG LED-PMHLMR PGT---ARAL NILF-YGCEY YSGHFPDGEA -ERSWIYD-M YCEL----KK DHKSAIVAVK HCHKGMDWDT GKE-MNHGPK -VIIHGLLFH -------AEP NIGGQPGVSH VFV-GNDATR KLIAGASVMF IVKFMCGACV PFFRKLFMAL RNMRYDYSQN YDIKIVETHL REGVYAIENS WETLVVC-EM TSGRMGAKIN HLPRLPEQVI -PPCILTVSG LKRAV-NGYN DETEITKTPD IKIC--TWGT IHFKNSVMQY CCYYKENRSV R--------- --FNNTGGAE LPK------- ---------- ----YHHQNV LVD-ELYSEL ---YPAAPKK A--HYM-GAG DIGGYEVMSE DLFQILVHE t97 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKHMSGHM KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHEC VNTMAKNS-S VSAVWKFDRF MCDLMHLMQN --------GD L--------- ---------- -CSF--PSFL GMCMFHQQVR CKGE--LVLP DSKNEESVSF VYIQHHSFND KRCLNYLNVG DNEDSEHLEE KHLRV----Y G---NIGK-H -VIINVWNAM MGRLNHHEPA AEVVCPLRK- -GGPDSKPM- ---------- -------VDG FFDK-E-D-- ----DRPGVA NAMSVPCD-- --DQWGGPVR GWCSSQHKFG LADHTLVHVP SKLHRTFPIN KKMGANVYQQ TSRMKATAPV YPHVMEYAHG V----IKPFY DRSEPKNEAQ GNGEK-QWNV PCVESMDCQY ENKHATMAKL MHP-PLYLVK MQTMSKPPMG DVDGHRCRAG LTALQIVQLD IGFSAKTLGQ -NIIHSKLLG HRTWLATSPL Q------IGC DVVVMGNKIG SPQ-EVELAA DPSVIYFYQN CIVQKMADVV STVKMKSPQE MRV------Q DVDSASLGDD DQQP--MNII PYWVCVTSGV VRKEQPD-SV NKRGRTWSAK N-DFLCPPAF NQGEH---GA EQKPVHEFQH PRCRPWQFVI SVYRTGAKNI NLLLYIKDV- SCNGVLQLLN WQLCDSH-GD WQSLVADSCA WA-VNLAVCG WKRELVPGL- --NHACEMLA KSVYFEPDGE -------DEG KMNLKIFGLD WCEIEKSHQQ -SSVDDNYL- VNMLPFTHSR ETPVGHYVQD LED-PMHLMR PGT---ARAL NILF-YGCEY YSGHFPDGEP -ERSWIYD-M YCEL----KK DHKSAIVAVK HCHKGMDWDT GKE-MNHGPK -VIIHGVLFH -------AEP NIGGQPGVSH VFV-GNDATR KLIAGVSVMP IVKFMCGACV PFFRKLFMAL RNMRYDYSQN YDIKIVETHL REGVYAVENS WETLVVC-EM TSGRIGAKIN HLPRLPEQVI -PPCILTVSG LKQAV-NDCN DETEITKTPD IKIC--TWPT IHFKNGMMQY CCYYKENRSV R--------- --FNNTGGAE LPK------- ---------- ----YHHQNV LVD-ELYSEV ---YPAAPKK A--FYM-GAG DIGGYEVMSE DLFQILVHK t17 MIWSEVRQMV REG--ADNAI DNR-C----- -------LIA ARVKHFSGHT KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGQDC VNTMAKSS-S QSAVWKFDRF MCDLVHLMQN ---GARGSGD L--------- ---------- -CSF--PSFL GMCMFHEQVR CKGE--LVLP DSKNEESVSF VYIQHHSFSD KRCLNELNVG DNEDSIHLKE KHLRI----Y G---NIGK-V -VIINVWNAM MGRLNHHEPA AEVVYPLRK- -GGPDSKPM- ---------- -------VDG FFDK-E-D-- ----DRPGIA NAVAVPCS-- --DQWGGPVR GWCSSQHKFG LADHTLVHVP SLLHRTFAIN RKMGANVYQQ TSRMKAAAPV YPHMMEYAHG V----VKPFY ERSEPKNEAQ GNGEK-QWNV PCVQSVDCQY ENKHATMAKL MHP-PLYLMR METMSKPPMG DVDGVRCRAG LTALKIVQLD IGFSAKTLGQ -NIIDSKLLG HRTWLATSPL Q------IGC DVMVVGNKIG SPQ-EFELAA DPTVIYFYRN CIVQKLADVV STVKMKSPQE MRV------Q DVDSPSLGDD NQQS--MNII PYWVCVHSGV VQKEQPD-SV HKRGRTWTAK N-DFLCPPAF DQGEH---GA EQKPVHGFEH PRCRPWQFVI ANYQTGAKNI ILLLYVKDV- SCNGVLQLLN WQLCDSH-GD WQSLSADSCA WA-HNVAVCG WKRELVPGL- --NHACEMLA KTVYFEPDGE -------DEG KMVIKIFGLD WCEMEKAHQQ -SSVDDNYL- VNMLPFTHSR ENPVGHYVQG LED-PMHLIR PGT---ARAL NILF-YGCEY YSGQFPDGEP -ERSWIYD-V YCEL----KK DHKSAIVAVK HEHKGMDWDT GKE-MNQGPK -VIIHGVLFH -------AEV NIHGQPGVSH VFV-GNDATR KLIAGVSVMP FVKFMCGACV PFFRKLFMAL RNMRYDYTSN YDIKIIETHL RNGVYAVENS WETLVVC-EM TSGRIGAKIN HLPRLPEQVI -PPCMLTVSG LKQAM-AGVN DETEITKTPD IKIC--TWGT IHFKNFVMQY CVYYKENRSV R--------- --FNNTGGAE LPK------- ---------- ----YHHQNA LVD-ELYSDV ---YPAAPKK K--HYM-GEG DVGGYEVMSE DLFQILVHA t11 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKHFSGHT KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGQDC VNTMAKSS-S QSAVWKFDRF MCDLVHLMQN ---GARGSGD L--------- ---------- -CSF--PSFL GMCMFHEQVR CKGE--LVLP DSKNEESVSF VYIQHHSFSD KRCLNSLNVG DNEDSIHLKE KHLRI----Y G---DIGK-H -VIINVWNAM MGRLNHHEPA AEVVYPLRK- -GGPDSKPM- ---------- -------VDG FFDK-E-D-- ----DRPGIA NAVAVPCS-- --DQWGGPVR GWCSSQHKFG LADHTLVHVP SLLHRTFAIN KKMGANVYQQ TSRMKAAAPV YPHMMEYAHG V----VKPFY ERSEPKNEAQ GNGEK-QWNV PCVQSVDCQY ENKHATMAKL MHP-PLYLMR METMSKPPMG DVDGVRCRAG LTALKIVQLD IGFSAKTLGQ -NIIDSKLLG HRTWLATSPL Q------IGC DVMVVGNKIG SPQ-EFELAA DPTVIYFYRN CIVQKLADVV STVKMKSPQE MRV------Q DVDSPSLGDD NQQS--MNII PYWVCVHSGV VQKEQPD-SV HKRGRTWTAK N-DFLCPPAF DQGEH---GA EQKPVHGFEH PRCRPWQFVI ANYQTGAKNI ILLLYVKDV- SCNGVLQLLN WQLCDSH-GD WQSLSADSCA WA-HNVAVCG WKRELVPGL- --NHACEMLA KTVYFEPDGE -------DEG KMVIKIFGLD WCEMEKAHQQ -SSVDDNYL- VNMLPFTHSR ENPVGHYVQG LED-PMHLIR PGT---ARAL NILF-YGCEY YSGQFPDGEP -ERSWIYD-V YCEL----KK DHKSAIVAVK HEHKGMDWDT GKE-MNQGPK -VIIHGILFH -------AEV NIHGQPGVSH VFV-GNDATR KLIAGVSVMP FVKFMCGACV PFFRKLFMAL RNMRYDYTSN YDIKIMETHL RNGVYAVENS WETLVVC-EM TSGRIGAKIN HLPRLPEQVI -PPCMLTVSG LKQAM-AGVN DETEITKTPD IKIC--TWGT IHFKNFVMQY CVYYKENRSV R--------- --FNNTGGAE LPK------- ---------- ----YHHQNV LVD-ELYSDG ---YPAAPKK K--HYM-GEG DVGGYEVMSE DLFQILVHA t23 MIWSECQRML REH--EDQAI ANE-C----- -------LIA FKVKLVNGHV KQVRNASEDA H--YTFFAGE KGARELIKR- YS-CMEGDGC VNTMAEGR-S ESAVWKFDPF MCQLTHSMQS --------GD L--------- ---------- -CSV--PSFL GVCMFQRQVV MKGE--LVLP DSADEGSVSF MYIQHHSFAA KRCLNVLNVG DNEDSVHLRH KQLRV----Y G---KIGK-H -VIISVPNAI MGRLNHHEPS AIVVFPLRQ- -RGADSKSV- ---------- -------GDG FYDK-D-E-- ----DRPGIA NAVSVPVA-- --EQWVGSVR GWYSSEHKYG LANHILI-GP SKLHRTYQTT AKMGANVVKA TNRMKRPQPV YPHVMEYANG V----VKPFY EVAESKNEAQ GNGEK-PVNV PCVESPDCQY ESKHARVNKL MHP-SLYLMC MEAMNKPIMG DWDGNRCRSP LCLLKVIQLD MGVSGKTLGQ -NIVMAKLLG ERTWLATSPL Q------IGC DVVAVGKKPE SPQ-EFECAA DPTVIYFYKN LIIQQAADYV SAVQVKSPQE MRY------Q DVNSPSDGDE NGQS--MHIG PYWVCVSSEV VKKSQPD-SV DKRGRTWVAK N-EFLCPPDH MQGEHSIEGA EQKPFFGFAG PFPQPWQFVI VNPQTWAHNI IRLLYCKDV- SCNCVLTCLN WVLCDSH-GD WHSLIADACP CA-HNVAVCG WKRELVPGL- --NHSNEHMA KTIYFEPDGH -------DEG KMILNIFGLD WCEVETSHQE -SSSDDNHL- VNMLPFGVSR DDPVGHYMLG LED-AIRLYR PGT---ARAL NILF---VNY YSGDFQDPEL -ERSQLYN-V YCEQ----KQ DHRNAIRASK NDHKGMAWDT AKD-MEQGPK -MVEHQVLFY -------AEP NMHGQPEVIH IFI-GNDAMC MLIPGMSVMV HHKFMRAACM PFFNKLFMAV RYMRYDYVDN YDIKIDETRY RDGVYARRNS WETANVC-EM SSGRSGAKIN HNPRLPEQVT -PPRSLTVSG LKDTM-PGHP DVTEITKTPV IAIV--MWGT INLKNHVMQY CCYYKETRSV R--------- --VNNTGGAE LPK------- ---------- ----YHYQNI LVD-ELYSND ---YPAAPKK A--QFM-GVG DVGGYEIVCE DIFQILVCD t76 LSSSHEVRVV NHKSKPDEHI ADK-D----- -------MCA DAMCNGSGHT GRIHNAVEGG I--VTNFHGF ---AEMPKN- HD-VMEEQGG PGTCLAPQQS ADTSWPFVMF TVDLAHRCRG --------GD MPQNTGDSMT CPHLAEGLAA VNS---RAFI GVSIYDLKVS HKMI--QACN PIQN------ ---DNHAFSD QRCLNDLSQG VVGASFPKMQ HGVRH----Y K---K----- ---RVYENEV GGRLDDAEAI DDMGVWIRW- -EGVKHAWCH PIGSCPDHLV CSLLVLQPAR NFIQLS-MVR DTPHRLPKMA SAAPVYKQTC HLEQVQGTAR MWGKSGPRVV LNGHMLK-TQ NTLHRQYLVD VELQATMVFV AARMKTDSSM YMSVREILHG ---------- --CGIKDEAQ ADGQR-PATL IVVLSKDNKG ITEHGAVLKK IHP------- ---LCKRCME NGRCLRYKND LTGDQLVQLA D----NTCAW -NLFEC--LR DSKSLFGSPL FIKVDEDRGF TAP------- SK-------V EPKCAQFYSK SCTHC----- ---------- -TM------C SVGSHASEED ALDLYTHKPV PDAQCFVSRV ARNIPEH-SP CK-------- ---------- ---------- -P---CS--- ---------- ----VEEVCY CKVCDSKDVR AVKNAYQDLQ VPLSKLK-AP WLSMGHCECW EEDINNILSV VKHELVDDVD RMNRLTEVAA KMAYFGPDGF HWDVELWEEN DLNCDDFELG W-NLKH---- ---EDDHPL- LCIGSFSVHK YVSVMVYPLP MND-CVRMSQ PCHAAHAQDI PTEQ---TRY QIHTFLDDSI -RRDLCNQ-G CHEENMVWRD DLKDPISTEV IADKLHEWPT NVN-KENSAD ----HRQLFH -------ANS SALDKHQHNV VMN-GSPCIR ALIIGGSSVG VNHFMMGPCQ EFFTDLFMMY EGLQYACVCA MNITIQCLHT GEGVMCNVKC KEFLQREDEM KAGLIGIICN HLSRMIMVIL -VHCLLTHAG LKSME-IGFD R-TGVVRMPL IAEP--LYLV I-YVNAVEPY TDAYKKPKSM HQFQFDDCQI RYRTNTGFEE TPVGATHLTH VCVCVPHPWT KGKSEILQNM GSA-VLYNDV IRDHNASEQK E--AHV-PMG DEGRISRAKD EIMYIRDLE t53 LSSSHEVRVV NHKSKPDEHI ADK-D----- -------MCA DAMCNGSGHT GRIHNAVEGG I--VTNFHGF ---AEMPKN- YD-VMEEQGG PGTCLAPQQS ADTSWPFVMF TVDLAHRCRG --------GD IPQNTGDSMT CPHLAEGLAA VNS---RAFI GVSTYDLKVS HKMI--QACN PIQN------ ---DNHAFSD QRCLNDLAQG VVGASHPKMQ HGVRH----Y K---K----- ---RVYENEV GGRLDDAEAI DDMGVWIRW- -EGVKHAWCH PIGSCPDHLV CSLLVLQPAR NFIQLS-MVR DTPHGLPKMA SAAPVYKQTC HLEQVQGTAR MWGKSGPRVV LNGHMLK-TQ NTLHRQYLVD VELGATMVFV AARMKTDLSM YMSVREILHG ---------- --CGIKDEAQ ADGQR-PATL IVVLSKDNKG ITEHGAVLKK IHP------- ---LCKRCME NGRCLRYKND LAGDQLVQLA D----NTCAW -NLFEC--LR DSKSLFGSPL FIKVDEDRGF TAP------- SK-------V EPKCAQFYSK SCTHC----- ---------- -TM------C SVGSHASEED ALDLYTHRPV PDAQCFVSRV ARNIPEH-SP CK-------- ---------- ---------- -P---CS--- ---------- ----VEEVCY CKVCDSKDVR AVKNAYQDLQ VPLSKLK-AP WLSMGHCECW EEDINNILSV VKHELVDDVD RMNRLTEVAA KMAYFGPDGF HWDVELWEEN DLNCDDFELG W-NLKP---- ---EDDHPL- LCIGSFSVHK YVSVMVYPLP MND-CVRMSQ PCHAAHAQDI PTEQ---TRY QIHTFLDDSI -RRDLCNQ-G CHEENMVWRD DLKDPISTEV IADKLHEWPT NQN-KENSAD ----HRQLFH -------ANS SALDKHQHNV VMN-GSPCIR ALIIGGSSVG VNHFMMGPCQ EFFTDLFMMY EGLQYACVCA MNITIQCLHT GEGVVCNVKC KEFLQREDEM KAGLIGIICN HLSRMIMVIL -VHCLLTHAG LKSME-IGFD R-TGVVRMPL IAEP--LYLV I-YVNAVEPY TDAYKKPKSM HQFQFDDCQI RYRTNTGFEE TPMGATHLTH VCVCVPHPWT KGKSEILQNM GSA-VLYNDV IRDHNASEQK E--AHV-PMG DQGRISRAKD EIMYIRDLE t16 LSSSHEVRVV NHKSKPDEHI ADK-D----- -------MCA DAMCNGSGHT GRIHNAVEGG I--VTNFHGF ---AEMPKN- YD-VMEEQGG PGTCLAPQQS ADTSWPFVMF TVDLAHRCRG --------GD IPQNTGDSMT CPHLAEGLAA VNS---RAFI GVSTYDLKVS HKMI--QACN PIQN------ ---DNHAFSD QRCLNDLAQG VVGASHPKMQ HGVRH----Y K---K----- ---RVYENEV GGRLDDAEAI DDMGVWIRW- -EGVKHAWCH PIGSCPDHLV CSLLVLQPAR NFIQLS-MVR DTPHGLPKMA SAAPVYKQTC HLEQVQGTAR MWGKSGPRVV LNGHMLK-TQ NTLHRQYLVD VELGATMVFV AARMKTDLSM YMSVREILHG ---------- --CGIKDEAQ ADGQR-PATL IVVLSKDNKG ITEHGAVLKK IHP------- ---LCKRCME NGRCLRYKND LAGDQLVQLA D----NTCLW -NLFEC--LR DSKSLFGSPL FIKVDEDRGF TAP------- SK-------V EPKCAQFYSK SCTHC----- ---------- -TM------C SVGSHASEED ALDLYTHKPV PDAQCFVSRV ARNIPEH-SP CK-------- ---------- ---------- -P---CS--- ---------- ----VEEVCY CKVCDSKDVR AVKNAYQDLQ VPLSKLK-AP WLSMGHCECW EEDINNILSV VKHELVDDVD RMNRLTEVAA KMAYFGPDGF HWDVELWEEN DLNCDDFELG W-NLKP---- ---EDDHPL- LCIGSFSVHK YVSVMVYPLP MND-CVRMSQ PCHAAHAQDI PTEQ---TRY QIHTFLDDSI -RRDLCNQ-G CHEENMVWRD DLKDPISTEV IADKLHEWPT NQN-KENSAD ----HRQLFH -------ANS SALDKHQHNV VMN-GSPCIR ALIIGGSSVG VNHFMMGPCQ EFFTDLFMMY EGLQYACVCA MNITIQCLHT GEGVVCNVKC KEFLQREDEM KAGLIGIICN HLSRMIMVIL -VHCLLTHAG LKSME-IGFD R-TGVVRMPL IAEP--LYLV I-YVNAVEPY TDAYKKPKSM HQFQFDDCQI RYRTNTGFEE TPMGATHLTH VCVCVPHPWT KGKSEILQNM GSA-VLYNDV IRDYNASEQK E--AHV-PMG DQGRISRAKD EIMYIRDLE t42 LSSSHEMRVV HHKSKPDEEI ADQ-D----- -------LCA EEMCKGSGHT GRIHSAVEGG I--WTNFHGF ---AEMPKN- YD-VMEEQGG PGTCIAPRQS ADTSWPFMVF TVDLAHRCRG --------GD TPTQAGDSMT FPHLAEGLPA VNS---RAFV GVSSYDLRVS HKMI--QACT LIQN------ ---DNVAFSD QRPLNDLNQG CVGTSHPKMP HGVRH----Y K---K----- ---KVYENEV GDRLDDAEGI DDVGVWMRW- -NGVKHAWCH IIGSCPDHLV CSLLVLQPGR NFIQLS-VVH DTPHRLPKMA SAAAVFKQTC HLEQVPGTAR MWGKSGPRVR LNGHVLK-NQ NILHRQYLVD VNLGATMVFV LARMKTDASM YMSHREILHG GTFDAAKPFR QICGTKDEAQ PDGRR-PATL LVVLSKDNQG ITEHGAVLKH VHP------- ---LCKKDCN NNRCLRCKNV LAGNQLIQLS D----VTCAW -NLFEC--LG DSESLFGSPL AIKVDEDRGF TAP------- SK-------V EPKCAQFYSK SCTHH----- ---------- -MM------C SVGSNASEED ALELYTHKPV PDPQCFVSIV VRNIPEH-SP CK-------- ---------- ---------- -P---CS--- ---------- ----VEEVAY CKNCDSKDVR AVQNAYQDLA VPLSKLK-AP WLSMGHCECW EEDINNILSM VKHELVVDQD MVNRSPEVAA KMAYFGPDGF HWDVELCEES DLTVDDFELG W-VLKP---- ---EDDHPL- ICIGSFSVHK QFSVMVYPLP MND-AIRMSQ PCHAAHAQDI HTDQ---DRY DVRTFLGHSI -KCHMCNQ-A CHEENIVWRD DLKDPITTEV IAEKLQEWPT NQN-KENVAD ----HRQLFH -------ANS SALDKHRHNV MMN-GSACIR TLIIGGSIVG VNVFMMEPCQ EFFTDLFMVY EGLQYAVVCA VNIIIQCLHT NEGVVCNVKC KEFLQREEDM KSGLIGIICN HISRMCMHIL -VVCLLTHCG LKAME-IPFD I-TGVVRGPL ITEP--LYLI I-YVNAVEPY TDAYKKPKSM HQFQFDDVQI RYRTNTGFEE NPKLATHLVH VCVVVPHPWT KGKSEILQNM ESA-HLYNAV IRDNNASEQK E--AHV-PVG DEGRISRAKN EILYIRDLE t24 QSSSHEMRVV HHKSKPDEEI TDQ-D----- -------LCA EEMCKGSGHT GRIHSAVEGG I--WTNFHGF ---AEMPKN- YD-VMEEQGG PGTCVAPRQS ADTNWPFMVF PVELAHRCRG --------GD TP-------- FPHLAEGLPA VNS---RAFI GVSSYDLKVS HKMI--QACN PIQN------ ---DNVAFSD QRVLNDLSQG CVGTSHPKMP HGVRH----Y K---K----- ---RVYENEV GDRLDDAEAV DDVGVWMRW- -AGVKVAVCH VIGSCPDHLV VSLLVLQPAR NFIQLS-VVH DTPHRLPKMA SAAAVFKQTC HLEQVAGTAR VWGKSGPRVK LNGHVLK-NQ NVLHRQYLVD VDLGATMVFV AARMKTDASM YMSHREILHG GTFDAAKPFR QICGVKDEAQ PDGQR-PATL LVVLSKDNVG ITEHGAVLKH VHP------- ---LCKKDCN NNRCLRCKNV LAGNQLVQLA D----ITCAW -NLFEC--LG DSESLFGSPL AIKVDEDRGF TAP------- SK-------V EPKCQQFYSK SCTHC----- ---------- -VM------C SVGSNASEED ALELYTHKPV PDPQCFVSIV VRNIPEH-SP CK-------- ---------- ---------- -P---CS--- ---------- ----VEEVCY CKNCDSKDVR ATQNAYQELA IPLSKLK-AP WLSMGHCECW EEGINNILSM VKHELVVDQD MVNRSPEVAA KMAYFGPDGF HWDIELCEEN DLTCDDFELG W-VLKP---- ---EDDHPL- ICIGSFSVHK QFSVMVYPLP MND-AIRMSQ PCYAAHAQDI HTDQ---DRY DIRTFLEHSI -RCHLCNQ-A CHEENIVWRD DLKDPITTEV IADKLQEWPT NQN-AENVAD ----HRQLFH -------ANS SCLDRHRHNV MMN-GSSCIR TLIIGGSIVG VNVFMMGPCQ EFFTDLFMVY EGLQYAVVCA INIIIQVLHT NEGVVCNVKC KEFLQREDDM KSGLIGIICN HISRMCMHIL -VHCLLTHCG LKAME-IPFD R-TGVVRGPL IAEP--LYLI I-YVNAVEPY TDAYKKPKSM HQFQFDDVQI RYRTNTGFEE NPKLATHLVH VCVVVPHPWT KGKSEILQNM ESARHLYNAV IRDNNASHQK E--AHV-PAG DQGRISRAKN EILYIRDLE bpp-seq-2.1.0/test/example.aln000644 000000 000000 00000462516 12147656566 016205 0ustar00rootroot000000 000000 CLUSTAL W (1.8) multiple sequence alignment t73 LIVSQIRVMVRDG--IHKAMDEE-V------------LIARRVKPYSGNGTQVRNDVEDA t66 LIVSQIRVMVRDG--IHKAMDEE-V------------LIARRVKPCSGNGTQVRNDVEDA t32 LIVSQIRVMVRDG--IHKAMDEE-V------------LIARRVKPYSGNGTQVRNDVEDA t75 LIVSEIRVMVRDE--VHKAMDEE-C------------LIARRVKPYSGNGNQIRNDIEDA t79 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTFSGSQNQVRNAVEDA t27 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTMSGSQNQVRNAVEDA t93 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTMSGSQNQVRNAVEDA t59 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTFSGSQNQVRNAVEDA t7 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKKFSGSQGQVRNAIEDA t78 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTFSGSQGQVRNAIEDA t12 LIVSEVRVMVRDG--INIAIDEL-C------------LIANRVKAFSGHQNQVRNAMEDA t81 LIVSEVRHMVRDG--ANVAIDEL-C------------LIACRVKAFSGHGHQVRNAVEDA t21 LIVSEVRHMVRDG--ANIAIDEL-C------------LIACRVKAFSGHGNQVRNAVEDA t80 LIVSEVRHMVRDG--VNIAVDEI-C------------LIANRVKSMSGQGNQVRNAMEMA t14 LIVSEVRHMVRDG--VNIAVDEI-C------------LIANRVKSMSGQGNQVRNAMEMA t85 LIVSEVRHVVRDG--VNIAVDEI-C------------LIANRVKSMSGQGNQVRNAMEMA t62 LIVSEVRVMVRDG--IHIAVDEI-C------------LIANRVKCMSGQGNQARNAMEMA t19 LIVSEVRHMVRDG--INIAVDEI-C------------LIANRVKCVSGQGNQARNAMEMA t77 LIVSEVRHMVRDG--INIAVDEI-C------------LIANRVKSMSGQGNQARNAMEMA t88 LIVSECRLIIRDG--NHDAIDEM-CCVANDLNNEIERLVASMVKSFRGHDSQARNNSECM t37 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECI t35 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECI t48 LIVSDMRLIIREG--SDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECI t55 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECI t46 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECI t67 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECI t57 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECI t56 LIVSEWRLFIRDG--HDDAIDEM-CCEANELNNDIEKLVASMVKGFRGHDSQARNNAECI t13 GIVSECRMIIRDE--HDDAIDEM-C------------LVASMVKKLSGCENQARNNHECA t5 GIVSECRMIIRDE--HDDAIDEM-C------------LVASMVKKLSGCENQARNNHECA t38 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECA t33 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECA t100 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECA t34 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECA t84 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCDDQARNNHECA t92 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECA t3 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEQQARNNRECA t22 GIVSECRVIIRDQ--SDDAIDER-Y------------LVASVVKRLSGCENQARNNRECA t64 GIVSECRMIIRDE--TDDAIDEV-C------------LVASMVKKLSGCENQTRNNRECA t18 GIVSECRMIIRDE--TDDAIDEV-C------------LVASMVKKLSGCENQTRNNRECA t68 GIVSECRMIIRDE--TDDAIDEV-C------------LVASMVKKLSGCENQTRNNRECA t28 GIVSECRMIIRDE--ADDAIDEM-C------------LVASMVKKLSGCENQARNNRECA t82 GIVSECRMIIRDE--ADDAIDEM-C------------LVASMVKKLSGCENQARNNRECA t41 GIVSECRMIIRDE--ADDAIDEM-C------------LVASMVKKLSGCENQARNNRECA t71 GIVSECRMIIRDQ--SDDAIDEMVV------------LVASMVKEMSGCENQARNNSECA t94 GIVSECRMIIRDQ--SDDAIDEMVC------------LVASMVKELSGCENQARNNRECA t99 GIVSEVRMIIRDE--SDDAIDEM-C------------LVASMVKALSGCENQARNNRECA t40 GIVSECRMIIRDE--SDDAIDEV-C------------LVASMVKALSGCENQARNNRECA t90 GIVSECRMIIRDE--SDDAIDEV-C------------LVASMVKALSGCENQARNNRECA t4 GIVSECRMIIRDE--PDDAIDEV-C------------LVASMVKELSGCENQARNNRECA t36 MIWSEVRKMVRER--REQAIDNK-C------------LIAARVKMMSGYVLQVRNALEDA t87 MIWSEVRKMVRER--REQAIDNK-C------------LIAARVKMMSGYMLQVRNALEDA t89 MIWSEHRKMVREG--REQAIDNK-C------------LIAKRVKVMSGYILQVRNAPEDA t39 MIWSEHRKMVREG--REQAIDNK-C------------LIAKRVKVMSGYILQVRNAPEDA t8 MIWSEHRKMVREG--REQAIDNK-C------------LIAKRVKVMSGYILQVRNAPEDA t6 MIWSEHRKMVREG--REQAIDNK-C------------LIAKRVKVMSGYILQVRNAPEDA t58 MIWSEYRHMVREG--RDQAIDNK-C------------LIAARVKVMSGYVLQVRNAPEDA t54 MIWSEYRHMVREG--RDQAIDNK-C------------LIAARVKVMSGYVLQVRNAPEDA t65 MIWSEYRQMVREG--NDQSIDNK-C------------LIADRVKMMSGVVRQVRNAPEDA t29 MIWSEYRQMVREG--NDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDA t98 MIWSEHRQMVREG--NDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDA t83 MIWSEYRQMVREG--NDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDA t95 MIWSEYRQMVREG--TDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDA t20 MIWSEYRQMVREG--TDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDA t69 MIWSDYRQMVREG--GDDSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDA t2 MIWSDYRQMVREG--GDDSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDA t10 MIWSDYRQMVREG--GDDSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDA t31 MIWSEYRSMVREG--ADQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDA t15 MIWSEYRSMVREG--SDQSIDNH-C------------LIAARVKMMSGVVRQVRNAPEDA t63 MIWSEYRSMVREG--SDQSIDNK-C------------LTAARVKLMSGVVRQVRNAPEDA t50 MIWSEYRSMVREG--SDQSIDNK-C------------LTAARVKLMSGVVRQVRNAPEDA t25 MIWSEYRSMVREG--SDQSIDNK-C------------LTAARVKMMSGVVRQVRNAPEDA t51 MIWSEYRSMVREG--SDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDA t9 MIWSECRQMVREK--TDNAIDNQ-C------------LIAARVKENSGHVKQVRNDSEDI t47 MIWSECRQMVREK--SDNAIDNQ-C------------QIAARVKRNSGHVKQVRNDSEDI t60 MIWSECRQMVREK--TDNAIDNQ-C------------QIAARVKENSGHVKQVRNDSEDI t30 MIWSECRQMVREK--TDNAIDNQ-C------------QIAARVKENSGHVKQVRNDSEDI t70 MIWSECRQMVREK--NDNAIDNQ-C------------LIAARVKEDSGHVKQVRNASEDI t91 MIWSECRQMVREK--NDNAIDNQ-C------------LIAARVKEDSGHVKQVRNASEDI t49 MIWSECRQMVREK--NDNAIDNQ-C------------LIAARVKENSGHVKQVRNASEDI t52 MIWSEERQMVREK--VDNAVDNQ-C------------LIAARVKENCGHVKQVRNASEDI t43 MIWSEVRQMVREG--SDNAIDNR-C------------LIAARVKHVQGHVKQVRNANEDA t1 MIWSEVRQMMREG--TDNAIDNQ-C------------LIAARVKHMQGHVKQVRNANEDA t86 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKPVQGHVKQVRNANEDA t72 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHVQGHVKQVRNANEDA t96 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHVQGHVKQVRNANEDA t44 MIWSEVRQMVREG--TDNAIDTR-C------------LIAARVKHMQGHVKQVRNANEDA t45 MIWSEVRQMVREG--TDNAIDNC-C------------LIAARVKHMQGHVKQVRNANEDA t74 MIWSEVRQMVREG--TDNAIDNC-C------------LIAARVKHMQGHVKQVRNANEDA t26 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHMSGHVKQVRNANEDA t61 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHMSGHVKQVRNANEDA t97 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHMSGHMKQVRNANEDA t17 MIWSEVRQMVREG--ADNAIDNR-C------------LIAARVKHFSGHTKQVRNANEDA t11 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHFSGHTKQVRNANEDA t23 MIWSECQRMLREH--EDQAIANE-C------------LIAFKVKLVNGHVKQVRNASEDA t76 LSSSHEVRVVNHKSKPDEHIADK-D------------MCADAMCNGSGHTGRIHNAVEGG t53 LSSSHEVRVVNHKSKPDEHIADK-D------------MCADAMCNGSGHTGRIHNAVEGG t16 LSSSHEVRVVNHKSKPDEHIADK-D------------MCADAMCNGSGHTGRIHNAVEGG t42 LSSSHEMRVVHHKSKPDEEIADQ-D------------LCAEEMCKGSGHTGRIHSAVEGG t24 QSSSHEMRVVHHKSKPDEEITDQ-D------------LCAEEMCKGSGHTGRIHSAVEGG t73 N--CQEFVGI---RELGKH-YK-CMDGFHTVNNGAGEN-SESAMWIFDLWMCKLNHGMQR t66 N--CQEFVGI---RELGKH-YK-CMDGFHTVNNGAGEN-SESAMWIFDLWMCKLNHGMQR t32 N--CQEFVGI---RELGKH-YK-CMDGFHTVNNGAGEN-SESAMWIFDLWMCKLNHGMQR t75 N--GQEFVGI---RELGKH-YD-CMDGYHTVNNGAGEN-SESAMWIFDLWMCKLNHGMQR t79 A--RPDFVGT---RELGKQ-YE-CMDGVGAVDTGAGDN-SESAVVIFDIWMCQLPHGMQW t27 A--RPDFVGI---RELGKQ-YE-CMDGVGAVDTGAGDN-SESAVVIFDIWMCQLPHGMQW t93 A--RPDFVGI---RELGKQ-YE-CMDGVGAVDTGAGDN-SESAVVIFDIWMCQLPHGMQW t59 A--RPDFVGI---RELGKQ-YE-CMDGVGAVDTGAGDN-SESAVVIFDVWMCQLPHGMQW t7 A--RPDFVGI---RELGKQ-YE-CMDGVGAVDTGAGDN-SESAVVIFDIWMCQLPHGMQW t78 A--RPDFVGI---RELGKQ-YE-CMDGVGAVDTGAGDN-SESAVVIFDIWMCQLPHGMQW t12 Q--RPDFVGI---RELGKQ-YQ-CMDGHGAVDTGAGRN-SESAVWIFDIWMCKLAHGMQW t81 A--RPDFIGI---RELGKP-Y--CMDGHGAVNTGAGHN-SESAVWIFDIWMYKLSHGMQW t21 P--RPDFIGV---RELGKP-Y--CMDGHGAVNTGAGVN-SESAVWIFDIWMYKLSHGMQW t80 A--RQNFVGM---RELEKV-YE-CMDGQGAVNTEAGNN-SESAVWIFDIWMCKLTHGMQD t14 A--RQNFVGM---RELEKV-YQ-CMDGQGAVNTEAGNN-SESAVWIFDIWMCKLTHGMQD t85 A--RQNFVGM---RELDKA-YQ-CMDGQKAVNTEAGNN-SESAVWIFDICMCKLTHGMQD t62 A--RQNFVGM---RELGKQ-YQ-CMDGQGAVNTEAGNN-SESAVWIFDIWMCRLTHGMQD t19 A--RQNFVGM---RELGKQ-YQ-CMDGQGAVNTEAGNN-SESAVWIFDIWMCRLTHGMQD t77 A--RQNFVGM---RELGKQ-YH-CMDGQGTVNTEAGNN-SESAVWIFDIWLCRLTHGMQD t88 R--SAPFIGV---RELFKR-YHKCVEGAGCVHTVAGTP-SDSPVWMFDQFMCQLTHSMVD t37 T--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN t35 T--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN t48 T--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN t55 T--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN t46 T--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN t67 T--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN t57 T--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN t56 I--AAPFIGV---RELFKR-YIKCVEGAGCVYTVA-AP-SESPVWMFDKFMCHLTHSMVV t13 I--PPPFHGV---REMFKRVYE-CMEGIGCVNTVAGNP-SESSAWMFDKVMYQLTHSMVW t5 I--PPPFHGV---REMFKRVYE-CMEGIGCVNTVAGNP-SESSAWMFDKVMYQLTHSMVW t38 I--SHPFHGV---REMFKR-YE-CMEGIGCVNTVAGNP-SESSAWMFDKFMYQLTHSMVW t33 I--SHPFHGV---REMFKR-YE-CMEGIGCVNTVAGNP-SESSAWMFDKFMYQLTHSMVW t100 I--SHPFHGV---REMFKR-YE-CMEGIGCVNTVAGNP-SESSAWMFDKFMYQLTHSMVW t34 I--SHPFHGV---REMFKR-YE-CMEGIGCVNTVAGNP-SESSAWMFDKFMYQLTHSMVW t84 I--SHPFHGV---REMFKR-YE-CMEGIGCVNTVAGNP-SESSAWMFDKFMYQLTHSMVW t92 I--SHPFHGV---REMFKR-YE-CMEGIGCVNTVAGNP-SESSAWMFDKFMYQLTHSMVW t3 T--SIPFLGV---RELWKR-YE-CMEGIGCVNTVAGKP-SQSSVWMFDRFMYKLTHSMVW t22 I--SIPFLGV---RELWKR-YE-CMEGIGCVNTVAGTP-SESSVWMFDRFMYKLTHSMVW t64 IT-SPPFIGV---RELFKR-YG-CMEGIGCVNTVAGMP-SESSVWMFDRFMYKLTHSMVW t18 IT-SPPFIGV---RELFKR-YG-CMEGIGCVNTVAGMP-SESSVWMFDRFMYKLTHSMVW t68 I--SPPFIGV---RELFKR-YG-CVEGIGCVNTVAGMP-SESSVWMFDRFMYKLTHSMVW t28 I--SPPFIGV---RELFKR-YH-CMEGIGCVNTVAGMP-SESSVWMFDQFMYKLTHSMIW t82 I--SPPFIGL---RELFKR-YV-CMEGIGCVNTVAGMP-SESSVWMFDRFMYKLTHSMVW t41 I--SPPFIGL---RELFKR-YV-CMEGIGCVNTVAGMP-SESSVWMFDRFMYKLTHSMVW t71 I--SPQFIGV---RELFKR-YR-CMEGIGCVNTVAGAP-SESSVWMFDRFMYKLTHSMVW t94 I--SPPFTGV---RELFKR-YR-CMEGIGCVNTVAGIP-SESSVWMFDRFMYRLTHSMVW t99 I--SPPFIGV---RELFKR-YG-CMEGIGCVNTVAGIP-SESSIWMFDRFMYKLTHSMVW t40 I--SPPFIGV---RELFKR-YG-CMEGIGCVNTVAGIP-SESSVWMFDRFMYKLTHSMVW t90 I--SPPFIGV---RELFKR-YG-CMEGIGCVNTVAGIP-SESSVWMFDRFMYKLTHSMVW t4 I--SPPFIGV---RELFKR-YG-CMEGIGCVNTVAGMP-SESSVWMFDRFMYKLTHSMVW t36 L--YVFFTGTKGTRELVKY-YM-CMEGHGCVNTMAKKS-SQSGIWKFNAFMC-LMHLMEG t87 L--YVFFTGNKGTRELVKY-YM-CMEGHGCVNTMAKKS-SQSGIWKFNAFMC-LMHLMEG t89 L--YNFFTGHKGTRELVKY-YM-CMEGEGVVNTMAKKS-SQSGIWKFNAFMC-LMRLMQG t39 L--YNFFTGHKGTRELVKY-YM-CMEGEGVVNTMAKKS-SQSGIWKFNAFMC-LMRLMQG t8 L--YNFFTGHKGTRELVKY-YM-CMEGEGVVNTMAKKS-SQSGIWKFNAFMC-LMRLMQG t6 L--YNFFTGHKGTRELVKY-YM-CMEGEGVVNTMAKKS-SQSGIWKFNAFMC-LMRLMQG t58 L--YNFFTGHKGTRELVKY-YM-CMEGEGCVNTMAKKS-SQSGIWKFNAFMC-LMHLMQG t54 L--YNFFTGHKGTRELVKY-YM-CMEGEGCVHTMAKKS-SQSGIWKFNAFMC-LMHLMQG t65 L--YNFFTGQKGTRELIKF-YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQS t29 L--YNFFTGQKGTRELIKF-YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQS t98 L--YNFFTGQKGTKELVKF-YF-CMEGCGCVNTMAKKS-SQSAPWKFNRFMC-LMHLMQS t83 L--YNFFTGQKGTKELIKF-YF-CMEGCGTVNTMAKDS-SQSAPWKFNRFMC-LMHLMQS t95 L--YNFFTGQKGTRELIKF-YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQL t20 L--YNFFTGQKGTRELIKF-YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQL t69 L--YNFFTGHKGTRELVKY-YH-CMEGCGCVNTMAKSS-SQSAPWKFNRFMC-LMHLMQE t2 L--YNFFTGQKGTRELVKY-YH-CMEGCGCVNTMAKSS-SQSAPWKFNRFMC-LMYLMQD t10 L--YNFFTGQKGTRELVKY-YH-CMEGCGCVNTMAKSS-SQSAPWKFNRFMC-LMYLMQD t31 L--YTFFTGQKGTRELIKY-YF-CMEGCGCVNTMAKQS-SQSAPWKFNRFMC-LMHLMQN t15 L--YTFFTGQKGTRELIKY-YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQN t63 L--YTFFTGQKGKRELIKY-YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQD t50 L--YTFFTGQKGTRELIKY-YF-CMEGCGCVNTMAKNS-SQSAPWKFNHFMC-LMHLMQN t25 L--YTFFTGQKGTRELIKY-YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQN t51 L--YTFFTGQKGTRELIKY-YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQN t9 L--YNHFTGKKGARELIKR-YV-CMEGHDCVNTVANVS-SQSAIWKFDRFMCDLMHLMQN t47 L-CYHHFTGKKGARELIKR-YI-CMEGHDCVNTMANVS-SQSAVWNFDRFMCDLMHLMQN t60 L-CYHHFTGKKGARELIKR-YI-CMEGHDCVNTMANVS-SQSAVWNFDRFMCDLMHLMQN t30 L-CYHHFTGKKGARELIKR-YI-CMEGHDCVNTMANVS-SQSAVWNFDRFMCDLMHLMQN t70 M--YNHFTGKKGARELIKR-YI-CMEGHDCVNTMANVS-SQSAVWKFDRFMCDLMHLMQD t91 M--YNHFTGKKGARELIKR-YI-CMEGHDCVNTMANVS-SQSAVWKFDRFMCDLMHLMQD t49 M--YNHFTGNKGACELIKG-YV-CMEGHDCVNTMANIS-SQSAVWKFDRFMCDLMHLMQD t52 L--YNHFTGKKGARELIKR-YI-CMEGHDCVNTMAHDS-SQSATWKFDRFMCDLMHLMQG t43 L--YQNFTGQKGAREKIKR-YL-CMEGHDCVNTMAKPS-SQSAVWKFDRFMCALMHLMQK t1 L--YQNFTGQKGAREKIKR-YL-CMEGHDCVNTMAKPS-SQSAVWKFDRFMCCLMHLMQN t86 L--YQNFTGQKGAREKIKR-YL-CMEGHDCVNTMAKPS-SQSAVWKFDRFMCALMHLMQN t72 L--YQNFTGQKGAREKIKR-YL-CMEGHDCVNTMAKVS-SQSAVWKFDRFMCALMHLMQN t96 L--YQNFTGQKGAREKIKR-YL-CMEGHDCVNTMAKPS-SQSAVWKFDRFMCALMHLMQV t44 L--YQNFSGQKGAREKIKR-YL-CMEGHDCVNTMAKPS-SQSAVWKFDRFMCDLMHLMQN t45 L--YQNFTGQKGAREKIKR-YK-CMEGHDCVNTMAKSS-SQSAVWKFDRFMCDLMHLMQN t74 L--YQNFTGQKGAREKIKR-YL-CMEGHDCVNTMAKPS-SQSAVWKFDRFMCDLMHLMQN t26 L--YQNFTGKKGAREKIKR-YL-CMEGHECVVTMAKNS-SISAMWKFDRFMCDLMHLMQN t61 L--YQNFTGQKGAREKIKR-YL-CMEGHECVNTMAKNS-SVSAVWKFDQFMCDLMHLMQN t97 L--YQNFTGQKGAREKIKR-YL-CMEGHECVNTMAKNS-SVSAVWKFDRFMCDLMHLMQN t17 L--YQNFTGQKGAREKIKR-YL-CMEGQDCVNTMAKSS-SQSAVWKFDRFMCDLVHLMQN t11 L--YQNFTGQKGAREKIKR-YL-CMEGQDCVNTMAKSS-SQSAVWKFDRFMCDLVHLMQN t23 H--YTFFAGEKGARELIKR-YS-CMEGDGCVNTMAEGR-SESAVWKFDPFMCQLTHSMQS t76 I--VTNFHGF---AEMPKN-HD-VMEEQGGPGTCLAPQQSADTSWPFVMFTVDLAHRCRG t53 I--VTNFHGF---AEMPKN-YD-VMEEQGGPGTCLAPQQSADTSWPFVMFTVDLAHRCRG t16 I--VTNFHGF---AEMPKN-YD-VMEEQGGPGTCLAPQQSADTSWPFVMFTVDLAHRCRG t42 I--WTNFHGF---AEMPKN-YD-VMEEQGGPGTCIAPRQSADTSWPFMVFTVDLAHRCRG t24 I--WTNFHGF---AEMPKN-YD-VMEEQGGPGTCVAPRQSADTNWPFMVFPVELAHRCRG t73 --------GDIGRVKRDVQKFPKLKEGAPNCSSFVKPYFMGCDMFHKQVEYRGTHGLVLD t66 --------GDIGRVKRDVQKFPKLKEGAPNCSSFVKPYFMGCDMFHKQVEYRGTHGLVLD t32 --------GDIGRVKQDVQKFPKLKEGAPNCSSFVKPYFMGCDMFHKQVEYRGTHGLVLD t75 --------GDIGRVQQDVHKFPKLKEGAPNCSSFVKPVFMGCDMFHRQVQNRGNHGLVLD t79 --------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPYFLGVDMFHREVILKGAKGLVLP t27 --------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPYFLGCDMFHREVILKGAKGLVLP t93 --------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPYFLGCDMFHREVILKGAKGLVLP t59 --------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPYFLGCDMFHREVILKGAKGLVLP t7 --------GDIGRVVVDHPKFLKLEEGAPNCSSMPMPYFLGCDMFHREVILKGAKGLVLP t78 --------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPYFLGCDMFHREVILKGAKGLVLP t12 --------GDIGRVSVDHPKFLKLEEGAPRCSSCPMPYFLGCEMFHRQVILKGAKGLVLP t81 --------GDIGRVGVDHPKFLKLEEGAPNCSSLPMPYFLGVDMFHRQVTIKGAKGLVLP t21 --------GDIGRVDVDHPKFLKLEEGAPNCSSLPMPYFLGVDMFHKQVTLKGAKGLVLP t80 FGD-----GDIGRVVVDHPKFEKLEEGAPNCSSHPVPYFLGIDMFHKQVMAKGSKGLILP t14 FGD-----GDIGRVVCDHPKFEKLEEGAPNCSSHPMPYFLGVDMFHEQVMAKGSKGLILP t85 --------GDIGRVVCDHPKFEKLEEGAPNCSSIPMPYFLGIDMFHKQVMAKGSKGLILP t62 --------GDIGRVECDHPKFAKLEEGAPNCSSLPMPYFLGIDMFHNQVMAKGSKGLILP t19 --------GDIGRVECDHPKFAKLEEGAPNCSSLPMPYFLGIDMFHKQVMGKGSKGLILP t77 --------GDIGRVECDHPKFAKLEEGAPNCSSLPMPYFLGIDMFHRQVMAKGSKGLILP t88 --------GDLGRVVQDNVIFAKLKEGAPHCISL--PHFLGIDMFHTQVFVGGH--LILP t37 --------GDLGRVVHDNWTFTKLKEGAPHCIAL--PYFMGIDMFHIQVFVGGN--LILP t35 --------GDLGRVVHDNWTFTKLKEGAPHCIAL--PYFMGIDMFHIQVFVGGN--LILP t48 --------GDLGRVVHDNWTFTKLKEGAPHCIAL--PYFMGIDMFHIQVFVGGN--LILP t55 --------GDLGRVVHDNWTFTKLKEGAPHCIAI--PYFMGIDMFHIQVFVGGN--LILP t46 --------GDLGRVVHDNWTFTKLKEGAPHCIAL--PYFMGIDMFHIQVFVGGN--LILP t67 --------GDLGRVVMDNWTFTKLKEGAPHCIAL--PYFMGIDMFHIQVFLGGD--LILP t57 --------GDLGRVVMDNWTFTKLKEGAPHCIAL--PYFMGIDMFHIQVFIGGD--LILP t56 --------GDLGRVLRDNTTFAKLKEGAPVCISL--PYFLGMDMFHQQVFMGGN--LILP t13 --------GDLRRVVHDNVTFSKLKEGAPHCISH--PYFLGIDMFHIQVYSKGF--LTLP t5 --------GDLRRVVHDNVTFSKLKEGAPHCISH--PYFLGIDMFHIQVYSKGF--LTLP t38 --------GDLGRVYYDNITFSKLKEGAPHCISQ--PFFLGIDMFHIQVYFKGS--LTLP t33 --------GDLGRVYYDNITFSKLKEGAPHCISH--PFFLGIDMFHIQVYFKGS--LTLP t100 --------GDLGRVYYDNITFSKLKEGAPHCISH--PFFLGIDMFHIQVYFKGS--LTLP t34 --------GDLGRVYYDNITFSKLKEGAPHCISH--PFFLGIDMFHIQVYFKGS--LTLP t84 --------GDLGRVYYDNITFSKLKEGAPHCISH--PFFLGIDMFHIQVYFKGS--LTLP t92 --------GDLGRVYYDNITFSKLKEGAPHCISH--PFFLGIDMFHIQVYFKGS--LTLP t3 --------GDLGRVYWDNLTFQKLKEGAPHCIAV--PNYLNIDMFHIQVFYKGP--LTLP t22 --------GDLGRVYVDNQTFSKLKEGAPHCISA--PYYLNIDMFHIQVFYKGS--LTLP t64 --------GDLRRVFPDNATFSKLKEGAPHVISH--PYFLGIDMFHDQVVYRGP--LTLP t18 --------GDLRRVFPDNATFSKLKEGAPHVISH--PYFLGIDMFHEQVVYRGP--LTLP t68 --------GDLRRVFPDNAHFSKLKEGAPHVISH--PYFLGIDMFHDQVVYRGP--LTLP t28 --------GDLGRVFPDNATFSKLKEGAPHVISH--PYFLGIDMFHAQVFYRGS--LTLP t82 --------GDLGRVFPDNCTFSKLKEGAPHVISH--PYFLGIDMFHGQVVYRGS--LTLP t41 --------GDLGRVFPDNCTFSKLKEGAPHVISH--PYFLGIDMFHGQVVYRGS--LTLP t71 --------GDLARVFHDNGTFAKLKEGAPHVISV--PYFLGIDMFHGQVFLRGS--LTLP t94 --------GDLGRVFQDNATFSKLKEGAPHVISH--PYFLGIDMFHGQVFYRGS--LTLP t99 --------GDLGQVFPDVSTFSKLKEGAPHVISQ--PYFLGIDMFHDQVFYRGS--LTLP t40 --------GDLGRVYPDNPTFSKLKEGAPHVISH--PYFLGIDMFHDQVFYRGS--LTLP t90 --------GDLGRVYPDNPTFSKLKEGAPHVISH--PYFLGIDMFHDQVFYRGS--LTLP t4 --------GDLGRVFPDNATFSKLKEGAPHVISV--PYFLGIDMFHDQVFYRGS--LTLP t36 --------GDL--------------------CSY--PSFLGMCMFHAMVQSKGD--LVLP t87 --------GDL--------------------CSY--PSFLGMCMFHAMVQSKGD--LVLP t89 --------GDL--------------------CSY--PSFLGMCMFYAMVQSKGD--LVLP t39 --------GDL--------------------CSY--PSFLGMCMFYAMVQSKGD--LVLP t8 --------GDL--------------------CSY--PSFLGMCMFYAMVQSKGD--LVLP t6 --------GDL--------------------CSH--PSFLGMCMFYAMVQSKGD--LVLP t58 --------GDL--------------------CSH--PSFLGMCMFHAMVKSKGN--LVLP t54 --------GDL--------------------CSH--PSFLGMCMFHAMVQSKGN--LVLP t65 --------GDL--------------------CNS--PSFLGMCMFHPQVQAKGE--LVLP t29 --------GDL--------------------CNC--PSFLGMCMFHPQVQAKGE--LVLP t98 --------GDL--------------------CNS--PPFLGMCMFHPQVQMKGE--LVLP t83 --------GDL--------------------CNS--PSFLGMCMFHPQVQAKGE--LVLP t95 --------GDL--------------------CNP--PSFLGMCMFHPQVRSKGE--LVLP t20 --------GDL--------------------CNP--PSFLGMCMFHPQVRSKGE--LVLP t69 --------GDL--------------------CNS--PGFLGMCMFHPQVQRKGE--LVLP t2 --------GDL--------------------CNS--PGFLGMCMFHPQVQAKGE--LVLP t10 --------GDL--------------------CNS--PGFLGMCMFHPQVQAKGE--LVLP t31 --------GDL--------------------CNL--PSFLGMCMFHNQVQCKGE--LVLP t15 --------GDL--------------------CNS--PSFLGMCMFHNQVPNKGA--LVLP t63 --------GDL--------------------CNS--PSFLGMCMFHNQVSFKGE--LVLP t50 --------GDL--------------------CNS--PSFLGMCMFHNQVSVKGE--LVLP t25 --------GDL--------------------CNS--PSFLGMCMFVNQVSFKGE--LVLP t51 --------GDL--------------------CNS--PSFLGMCMFHVQVQFKGE--LVLP t9 --------GDL--------------------CSY--PSFLPMCMFVPQVLSKGE--LVLP t47 --------GDL--------------------CSY--PSFLPMCMFVSQVLSKGE--LVLP t60 --------GDL--------------------CSV--PSFLPMCMFVSQVLHKGE--LVLP t30 --------GDL--------------------CSV--PSFLPMCMFVSQVLHKGE--LVLP t70 --------GDL--------------------CSY--PSFLPMCMFVAQVIAKGE--LVLP t91 --------GDL--------------------CSY--PSFLPMCMFVAQVIAKGE--LVLP t49 --------GDL--------------------CSY--PSFLPMCMFVAQVIAKGE--LVLP t52 --------GDL--------------------CSY--PSFLPVCMFVAQVINKGE--LVLP t43 --------GDL--------------------CSH--PSFLGMCMFHEQVRCKGE--LVLP t1 --------GDL--------------------CSH--PSFLGMCMFVEQVRCKGE--LVLP t86 --------GDL--------------------CSH--PSFLGMCMFHEQVRRKGE--LVLP t72 --------GDL--------------------CSH--PSFLGMCMFHEQVRRKGE--LVLP t96 --------GDL--------------------CSH--PSFLGMCMFHEQVRRKGE--LVLP t44 --------GDL--------------------CSH--PSFLGMCMFHEQVRCKGE--LVLP t45 --------GDL--------------------CSV--PSFLGMCMFHEQVRCKGE--LVLP t74 --------GDL--------------------CSV--PSFLGMCMFHEQVRCKGE--LVLP t26 --------GDL--------------------CSF--PSFLGMCMFHDQVRCKGE--LVLP t61 --------GDL--------------------CSF--PSFLGMCMFHEQVRCKGE--LVLS t97 --------GDL--------------------CSF--PSFLGMCMFHQQVRCKGE--LVLP t17 ---GARGSGDL--------------------CSF--PSFLGMCMFHEQVRCKGE--LVLP t11 ---GARGSGDL--------------------CSF--PSFLGMCMFHEQVRCKGE--LVLP t23 --------GDL--------------------CSV--PSFLGVCMFQRQVVMKGE--LVLP t76 --------GDMPQNTGDSMTCPHLAEGLAAVNS---RAFIGVSIYDLKVSHKMI--QACN t53 --------GDIPQNTGDSMTCPHLAEGLAAVNS---RAFIGVSTYDLKVSHKMI--QACN t16 --------GDIPQNTGDSMTCPHLAEGLAAVNS---RAFIGVSTYDLKVSHKMI--QACN t42 --------GDTPTQAGDSMTFPHLAEGLPAVNS---RAFVGVSSYDLRVSHKMI--QACT t24 --------GDTP--------FPHLAEGLPAVNS---RAFIGVSSYDLKVSHKMI--QACN t73 DTWNEESATFPYPQVHSRSDKRCLNGLHQGDHEESVH---HAPRI----MR---LIGH-H t66 DTWNEESATFPYPQVHSRSDKRCLNGLHQGDHEESVH---HAPRI----MR---LIGH-H t32 DTWNEESATFPYPQVHSRSDKRCLNGLHQGDHEESVH---HAPRI----MR---LIGH-H t75 DTWNEESGTFPYPQVHSRNEKRCINKLHQGDHNESCH---HNPRI----VR---RIGH-H t79 D--NEDSMAFPYVQVHSRSVKRCLNVLHQGDHEESRH---HNARI----VR---RIGL-N t27 D--NEDSMAFPYVQVHSRSVKRCLNVLHQGDHEESRH---HNARI----VR---RIGM-N t93 D--NEDSMAFPYVQVHSRSVKRCLNVLHQGDHEESRH---HNARI----VR---RIGM-N t59 D--NEDSMAFPYVQVHSRSVKRCLNVLHQGDHEESRH---HNARI----VR---RIGM-N t7 D--NEDSMAFPYVQVHSRSVKRCLNVLHQGDHEESRH---HNARI----VR---RIGM-N t78 D--NEDSMAFPYVQVHSRSVKRCLNVLHQGDHEESRH---HNARI----VR---RIGM-N t12 D--NEDSMAFPYVQVHSRSVKRCLNVLVQGDHDESRH---HNARI----VA---RIGM-N t81 DIWNESPMAFPYAQVHSKSHKRCLNVLVQGDHEESEH---HNARI----MS---KIGT-H t21 DIWNESPMAFPYAQVHSKSHKRCLNVLVQGDHEESEH---HNARI----MS---RIGT-H t80 DTWNEASMAFPYPQVHSKSHRRVLNVLHQGDVEES-H---HSARC----VR---RIGF-H t14 DTWNEASMAFPYPQVHSKSHRRVLNVLHQGDVEES-H---HSARC----VR---RIGF-H t85 DTWNEASMAFPYPQVHSKSHRRVLNVLHQGDHEES-H---VSARC----MR---RIGF-H t62 DTWNEASMAFPYPQVHSKSHRRVLNVLHQGDHEES-H---HSARC----MR---RIGY-H t19 DTWNEASMAFPYPQVHSKSHRRVLNVLHQGDHEES-H---HSARC----MR---RIGYGH t77 DTWNEASMAFPYPQVHSKSHRRVLNVLHQGDHEES-H---HSARC----MR---RIGY-V t88 DPCYELSISVMYAGHASYNQKRCINNLDQGDQEDSNHRKEHKIRASVLLYR---QIGI-L t37 DPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIGL-L t35 DPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIGL-L t48 DPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIGL-L t55 DPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIGL-L t46 DPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIGL-L t67 DPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIGL-L t57 DPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPQKEHKIRNSVLLYR---QIGL-L t56 DPCYELSISVMYAGHASYNQKRCLNNLDQGDREDSSHRKEHKIRRSVLLYQ---QIGC-L t13 DPRYEISMSVMYSQHHSFSMKRCLNGLDHGDREESPHQIEHKMRKSVLIYN---PIGY-L t5 DPRYEISMSVMYSQHHSFSMKRCLNGLDHGDREESPHQIEHKMRKSVLIYS---PIGY-L t38 DPRYELSMSVMYSQHHSFSQKRCLNGLDVGDREESPHHIDHKMRKSVLIYN---PIGY-L t33 DPRYELSMSVMYSQHHSFSQKRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIGY-L t100 DPRYELSMSVMYSQHHSFSQKRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIGY-L t34 DPRYELSMSVMYSQHHSFSQKRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIGY-L t84 DPRYELSMSVMYSQHHSFSQKRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIGY-L t92 DPRYELSMSVMYSQHHSFSQKRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIGY-L t3 DPHYELSMSVMYAQHHSFSQKRCLNALDHGDREESPHQIEHKMRKSVLLYN---PIGY-L t22 DPRYELSMSVMYAQHHSFSQKRCLNALDYGDREESPHQIEHKMRKSVLLYY---TIGW-L t64 DPRYELSMSVMYAQHHSFSQKRCLNALDHGDTQESPHGIEHNMRNSVLLYN---PIGF-L t18 DPRYELSMSVMYAQHHSFSQKRCLNALDHGDTQESPHGIEHNMRNSVLLYN---PIGF-L t68 DPRYELSMSVMYAQHHSFSQKRCLNALDHGDTQESPHGIEHNMRNSVLLYN---PIGF-L t28 DPRYELSMSVMYSQHHSFSQKRCLNPLDHGDRQESSHGIEHNMRSSVLLYN---PIGF-L t82 DPRYELSMSVMYSQHHSFSQKRCLNPLDHGDRQESPHGIEHNMRSSVLLYN---PIGF-L t41 DPRYELSMSVMYSQHHSFSQKRCLNPLDHGDRQESPHGIEHNMRSSVLLYN---PIGF-L t71 DPRYELSMSVMYAQHHSFSQKRCLNHLDHGDRQESPHGIEHNMRKSVLLYN---PQGY-L t94 DPRYELSMSVMYAQHHSMSQKRCLNTLDHGDRQESPHGIEHNMRKSVLLYD---PIGY-L t99 DPRYELSMSVMYAQHHSFSQKRCLNALDHGDRQESPHGIEHNMRKSVLLDS---PIGY-L t40 DPRYELSMSVMYAQHHSFSQKRCLNALDHGDRQESPHGIEHNMRKSVLLDS---SIGY-I t90 DPRYELSMSVMYAQHHSFSQKRCLNALDHGDRQESPHGIEHNMRKSVLLDS---PIGY-I t4 DPRYEMSMSVMYAQHHSFSQKRCLNALDYGDRQESPHGIEHNMRKNVLLDN---PIGH-L t36 DSPNEDSVSFMYIQHHSFHEKRCLNPLNVGDREDSEHIKEKHMRI----YA---NIGG-H t87 DSPNEDSVSFMYIQHHSFHEKRCLNPLNVGDREDSDHVKEKHMRI----YA---NIGG-H t89 DSCNEDSVSFMYIQHHSFHEKRCLNPLNVGDREDSDHTKEKHMRI----YA---NIGG-H t39 DSCNEDSVSFMYIQHHSFHEKRCLNPLNVGDREDSDHTKEKHMRI----YA---NIGG-H t8 DSCNEDSVSFMYIQHHSFHEKRCLNPLNVGDREDSDHTKEKHMRI----YA---NIGG-H t6 DSCNEDSVSFMYIQHHSFHEKRCLNPLNVGDREDSDHTKEKHMRI----YA---NIGG-H t58 DSCNEDSVSFMYIQHHSFHEKRCLNHLNVGDREDSEHIKEKHMRV----YA---SIGG-H t54 DSCNEDSVSFMYIQHHSFHEKRCLNHLNVGDREDSEHIKEKHMRI----YA---SIGG-H t65 DNPNENSVEFMYRRHHSLSDKRCLNLLYVGDSQDSQHLREKQLRI----YG---RIGK-V t29 DNPNENSVEFMYRRHHSLSDKRCLNLLFVGDSQDSQHQREKQLRI----YG---RIGK-V t98 DNPNENSVEFMYRRHHSLSDKRCLNLLYVGDSQDSQHLREKQLRI----YG---RIGK-V t83 DNPNENSVEFMYRRHHSLSDKRCLNLLYVGDSQDSQHLREKQLRI----YG---RIGK-V t95 DNPNENSVEFMYRRHHSLSDKRCLNLLYTGDSQDSQHLREKQLRI----YG---RIGR-V t20 DNPNENSVEFMYRRHHSLSDKRCLNLLYVGDSQDSQHLREKQLRI----YG---RIGR-V t69 DNPNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSQHLREKQLRM----YG---EIGK-V t2 DNPNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSQHLREKQLRM----YG---EIGK-V t10 DNPNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSQHLREKQLRM----YG---EIGK-V t31 DNVNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSQHLREKQLRI----YQ---NIGK-V t15 DNVNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSQHLREKQLRI----YP---SIGK-V t63 DNPNENSVEFMYRRHHSLSDKRCLNLLYVGDEQDSSHLREKQLRL----YA---RIGK-V t50 DNPNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSSHLREKQLRL----YA---RIGK-V t25 DNPNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSSHLREKQLRL----YA---RIGK-V t51 DHPNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSQHLREKQLRI----YA---RIGK-V t9 DSSNEESISFVYLQHHSFSVKRCLNYLNVGDSADSKHLKEKHLRA----EGMPGNIGK-H t47 DSSNEESISFVYLQHHSFSDKRCLNYLNVGDNSDSKHLKEKHLRA----QGMPGNIGK-H t60 DSSNEESISFVYLQHHSFSDKRCLNYLNVGDNADSKHLKEKHLRA----QGMPGNIGK-H t30 DSSNEESISFVYLQHHSFSEKRCLNYLNVGDNADSRHLKEKHLRA----QGMPGNIGK-H t70 DSSTEESISFMYLQHHSFSDKRCLNYLNVGDNEDSKHAKEKFLRG----DG---GIGQ-H t91 DSSTEESISFMYLQHHSFSDKRCLNYLNVGDNEDSKHAKEKFLRG----DG---GIGQ-H t49 DSSNEESISFMYLQHHSFSDKRCLNCLNVGDNEDSKHLKEKFLRA----DG---GIGQ-H t52 DSSNEESISFMYLQHHSFSEKRCLNYLNVGDNEDSKHLKEKFLRA----DG---NIGQ-H t43 DSPNEESVSFVYIQHHSFSDKRCLNCLNVGDNDDSEHLKEKHLRI----YG---QIGH-H t1 DSPNEESLSFVYIQHHSFSDKRCLNCLNVGDNDDSEHLKEKHLRI----YG---QIGR-H t86 DSPNEESVSFVYIQHHSFSDKRCLNCLNVGDNDDSEHLKEKHLRI----YG---QIGR-H t72 DSPNEESVSFVYIQHHSFSDKRCLNCLNAGDNDDSEHLKEKHLRT----YG---QIGR-H t96 DSPNEESVSFVYIQHHSFSDKRCLNCLNVGDNDDSEHLKEKHLRV----YG---QIGR-H t44 DSPNEESVSFVYIQHHSFSDKRCLNYLNVGDNDDSEHLKEKHLRI----YG---VIGK-H t45 DSPNEESVSFVYIQHHSFSDKRCLNYLNVGDNDDSEHLKEKHLRI----YG---EIGK-H t74 DSPNEESVSFVYIQHHSFSDKRCLNYLNVGDNDDSEHIKEKHLRI----YG---EIGK-H t26 DSKNEESVSFVYIQHHSFSDKRCLNYLNVGDNEDSEHLKEKHLRV----YG---NIGK-H t61 DSKNEESVSFVYIQHPSFSDKRCLNYLNVGDNEDSEHLKEKVLRV----YG---DIGK-H t97 DSKNEESVSFVYIQHHSFNDKRCLNYLNVGDNEDSEHLEEKHLRV----YG---NIGK-H t17 DSKNEESVSFVYIQHHSFSDKRCLNELNVGDNEDSIHLKEKHLRI----YG---NIGK-V t11 DSKNEESVSFVYIQHHSFSDKRCLNSLNVGDNEDSIHLKEKHLRI----YG---DIGK-H t23 DSADEGSVSFMYIQHHSFAAKRCLNVLNVGDNEDSVHLRHKQLRV----YG---KIGK-H t76 PIQN---------DNHAFSDQRCLNDLSQGVVGASFPKMQHGVRH----YK---K----- t53 PIQN---------DNHAFSDQRCLNDLAQGVVGASHPKMQHGVRH----YK---K----- t16 PIQN---------DNHAFSDQRCLNDLAQGVVGASHPKMQHGVRH----YK---K----- t42 LIQN---------DNVAFSDQRPLNDLNQGCVGTSHPKMPHGVRH----YK---K----- t24 PIQN---------DNVAFSDQRVLNDLSQGCVGTSHPKMPHGVRH----YK---K----- t73 LVIIVLMNHDMGRLNHLEPDAVIPSPLRIGGDGWVPKPI------------------EDG t66 LVIIVLMNHDMGRLNHLEPDAVIPSPLRIGGDGWVPKPI------------------EDG t32 LVIIVLMNHDMGRLNHLEPDAVIPSPLRIGGDGWVPKPI------------------EDG t75 LVIIVLVNRDMGRLNYIEPDALVPSPLRFGGDEWVPKPI------------------EDG t79 LVIISYANVDMGRLNHCEDEAIVFSPLRYGGAGFVSKPI------------------GDG t27 LVIISYANVDMGRLNHCEDQAIVYSPLRYGGAGFVSKPI------------------ADG t93 LVIISYANVDMGRLNHCEDQAIVYSPLRYGGAGFVSKPI------------------ADG t59 LVIISYANVDMGRLNHCEDEAIVFSPLRYGGAGFVSKPI------------------ADG t7 LVIISYANVDMGRLNHCEDEAIVFSPLRYGGAGFVSKPI------------------ADG t78 LVIISYANVDMGRLNHCEDEAIVFSPLRYGGAGFVSKPI------------------ADG t12 MVIIRYLNVEMGRLNHCEDDAMVYSPLRIGGAGTHSKPI------------------ADG t81 LVIINIINVEMQRLNHCEDKAIVYSPDRIGGAGFHSKPI------------------ADG t21 LVIVNVINVEMQRLNHCEDKAIVYSPDRIGGAGFHSKPI------------------ADG t80 LVIIVFWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPV------------------DDG t14 LVIIVYWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPV------------------DDG t85 LVIINFWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPI------------------NDG t62 RVIINYWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPI------------------ADG t19 LVIINFWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPI------------------ADG t77 LVIINFWNVEMGRLIHCEDEAIIYSPLRIGGAGFHSKPI------------------ADG t88 -VIIKEANELMNRLNHKEPENGIIFPLR---DAQDPKQI------------------LNG t37 -VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNG t35 -VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNG t48 -VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNG t55 -VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNG t46 -VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNG t67 -VIIKEANELMGRLNHKERKNGVIFPLR---GAQGPKQI------------------SNG t57 -VIIKEANEIMGRLNHKERKNGVIFPLR---GAQGPKQI------------------SNG t56 -VITRERNELMGRLNHKEPRDGVIFPHR---GAQGPKQL------------------ANG t13 -VIIKDANSMMGRLNHHESRAVVAFPLR---GAEGPKQV------------------MEG t5 -VIIKDANSMMGRLNHHESRAIVAFPLR---GAEGPKQV------------------MEG t38 -VIIKNANSLMGRLNHHESRLIVTFPLR---GAEGPKQV------------------MEG t33 -VIIKNANSLMGRLNHYESRAIVTFPLR---GAEGPKQV------------------MEG t100 -VIIKNANSLMGRLNHHESRAIVTFPLR---GAEGPKQV------------------MEG t34 -VIIKNANSLMGRLNHHESRAIVTFPLR---GAEGPKQV------------------MEG t84 -VIIKNANSLMGRLNHHESRAIVTFPLR---GAEGPKQV------------------MEG t92 -VIIKNANSLMGRLNHHESRAIVTFPLR---GAEGPKQV------------------MEG t3 -VIIKNANSMMGRLNHHESCAIVMFPLR---GAEGPKQI------------------IEG t22 -VIIKNADSMMGRLNHHESCAMVMFPLR---GAEGPKQI------------------MEG t64 -VIIKNINSMMGRLNHHESRAVVGFPLR---GAENPKQI------------------MEG t18 -VIIKNINSMMGRLNHHESRAVVGFPLR---GAENPKQI------------------MEG t68 -VIIKNINSMMGRLNHYESRAVVGFPLR---GAENPKQI------------------MEG t28 -VIIDNINPMMGRLNHHESRAIVTFPLR---GAEHPKQI------------------VEG t82 -VIIDNINPMMGRLNHHESRAIVTFPLR---GAEHPKQI------------------MEG t41 -VIIDNINPMMGRLNHHESRAIVTFPLR---GAEHPKQI------------------MEG t71 -VIIKNVNSMMGRLNHHESRAVVTFPLR---GADNPRQI------------------MEG t94 -VIIKNVNSMMGRLNHHESRAKVTFPLR---GADNPRQI------------------MEG t99 -VIIKNINSMMGRLNHHESRAVFAFPLR---GAENPKQI------------------MEG t40 -VIIKNLNSMMGRLNHHESRAVVTFPLR---GAEDPKQI------------------MEG t90 -VIIKNVNSMMGRLNHHESRAVVTFPLR---GAEDPKQI------------------MEG t4 -VIIKNENSMMGRLNHHESRAVVTFPLR---GAEDPKQK------------------MEG t36 -IIIREWNDVMGRLNHIEPGAEVIFPLRK--RGQHSKPV------------------IDG t87 -IIIREWNDVMGRLNHIEPGAEVIFPLRK--RGQHSKPV------------------IDG t89 -IIIREWNDVMGRLNVIEPEAEVIFPLRK--RGQHSKPV------------------IDG t39 -IIIREWNDVMGRLNVIEPEAEVIFPLRK--RGQHSKPV------------------IDG t8 -IIIREWNDVMGRLNVIEPEAEVIFPLRK--RGQHSKPV------------------IDG t6 -IIIREWNDVMGRLNVIEPEAEVIFPLRK--RGQHSKPV------------------IDG t58 -IIIREWNDVMGRLNHIEPGAEVTFPLRR--RGQASKPV------------------IDG t54 -IIIREWNEVMGRLNHIEVGAEVTFPLRR--RGQASKPV------------------IDG t65 -VIIKVWNAVMGRLNHHEPPAEVMFPLRK--GGQDSKPF------------------IDG t29 -VIIKVWNAAMGRLNHVEPPAEVMFPLRR--GGSDSKPF------------------LDG t98 -VIIKVWNAVMGRLNHHEPPAEVMFPLRR--GGSDSKPF------------------IDG t83 -VIIKVWNAVMGRLNHHEPPAEVMFPLRR--GGSCSKPF------------------VDG t95 -VILQVWNAVMGRLNHHEPPAEVMFPLRK--GGSNSKPV------------------YDG t20 -VILQVWNAVMGRLNHHEPPAEVMFPLRK--GGSNSKAV------------------YDG t69 -VIIKVVNAVMGRLNHVEPSAEVMFPLRK--GGSDSKPF------------------IDG t2 -VIIKVVNAVMGRLNHHEPSAEVMFPLRK--GGSDSKPF------------------IDG t10 -VIIKVVNAVMGRLNHHEPSAEVMFPLRK--GGSDSKPF------------------IDG t31 -VIIKVWNANMGRLNHHEPPAEVMFPLRK--GGSDSKPF------------------IDG t15 -VIIKVWNAAMGRLNHHEPPAEVMFPLRK--GGSDSKPF------------------IDG t63 -VIIKVWNAVMGRLNHHEPPAAVMFPLRK--GGSDSKPF------------------IDG t50 -VIIKVWNAVMGRLNHHEPPAEVMFPLRK--CGSDSKPF------------------IDG t25 -VIIKVWNAVMGRLNHHEPPAEVVFPLRK--GGSDSKPF------------------IDG t51 -VIIKVWNAVMGRLNHHEPPAEVVFPLRK--GGSDSKPF------------------IDG t9 -VIINVWNAVMGRLNHHEPPAEVVFPLRQ--GGPDSKPM------------------IDG t47 -VIINVWNAVMGRLNHHESPAEVVFPLRQ--GGPDSKPM------------------IDG t60 -VIINVWNAVMGRLNHHEPPAEVVFPLRQ--GGPDSKPM------------------IDG t30 -VIISVWNAVMGRLNHHEPPAEVVFPLRQ--GGPDSKPM------------------MDG t70 -VIINVWNAVMGRLNHHEPPPEVVFPLRQ--GGPDSKPM------------------VDG t91 -VIINVWNAVMGRLNHHEPPPEVVFPLRQ--GGPDSKPM------------------VDG t49 -VIINMWNAVMGRLNHHEPPPEVVYPLRQ--GGPNSKPM------------------VDG t52 -VIINMWNAVMGRLNHHEPPAEVNFPLRQ--GGPDSKPM------------------VDG t43 -VIINVWNAFMGRLNHHEPPAEVVYPLRK--GGPDSK-M------------------VDG t1 -VIINVWNAFMGRLNHHEPPAEVVYPLRK--GGPDSKPV------------------VDG t86 -VIINVWNAFMGRLNHHEPPAEAVYPLRK--GGPDSKPM------------------VDG t72 -VIINVWNAFMGRLNHHEPPAEVVYPLRK--GGPDSKPM------------------VDG t96 -VIINVWNAFMGRLNHHEPPAEVVYPLRK--GGPDSKLM------------------VDG t44 -VIINVWNAFMGRLNHHEPPADVEYPLRK--GGPDSKPM------------------VDG t45 -VIINVWNAFMGRLNHHEPPAEVMYPLRK--GGPESKPM------------------VDG t74 -VIINVWNAFMGRLNHHEPPAEVMYPLRK--GGPESKPM------------------VDG t26 -VIINSWNAMMGRLNHHEPAAEVVYPLRK--GGPESKPM------------------VDG t61 -VIINVWNAMMGRLNHHEPAAEVVCPLRK--GGPDSKPM------------------VDG t97 -VIINVWNAMMGRLNHHEPAAEVVCPLRK--GGPDSKPM------------------VDG t17 -VIINVWNAMMGRLNHHEPAAEVVYPLRK--GGPDSKPM------------------VDG t11 -VIINVWNAMMGRLNHHEPAAEVVYPLRK--GGPDSKPM------------------VDG t23 -VIISVPNAIMGRLNHHEPSAIVVFPLRQ--RGADSKSV------------------GDG t76 ---RVYENEVGGRLDDAEAIDDMGVWIRW--EGVKHAWCHPIGSCPDHLVCSLLVLQPAR t53 ---RVYENEVGGRLDDAEAIDDMGVWIRW--EGVKHAWCHPIGSCPDHLVCSLLVLQPAR t16 ---RVYENEVGGRLDDAEAIDDMGVWIRW--EGVKHAWCHPIGSCPDHLVCSLLVLQPAR t42 ---KVYENEVGDRLDDAEGIDDVGVWMRW--NGVKHAWCHIIGSCPDHLVCSLLVLQPGR t24 ---RVYENEVGDRLDDAEAVDDVGVWMRW--AGVKVAVCHVIGSCPDHLVVSLLVLQPAR t73 FFDK-D-Q------SRPDVANASIVPDK----DQWVGAHNQWGSSLRKVGLAVHDLR-NI t66 FFDK-D-Q------SRPDVANASIVPDK----DQWVGAHNQWGSSLRKVGLAVHDLR-NI t32 FFDK-D-Q------ARPDVANASIVPDK----DQWVGAHNQWGSSLRKVGLAVHDLR-NI t75 FFDK-D-Q------ARPDVANA-IVPDR----DQWVGAHKEWGSSLCKVGLAVHDLR-NV t79 FFDK-D-Q------ARPLMANAVCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NI t27 FFDK-D-Q------ARPLMANAVCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NI t93 FFDK-D-Q------ARPLMANAVCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NI t59 FFDK-D-Q------ARPLMANAHCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NI t7 FFDK-D-Q------ARPLMANAHCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NI t78 FFDK-D-Q------ARPLMANAHCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NI t12 FFDK-D-Q------ARPLIANAHIVPEN----DQWTGAVAQWCSSVIKCGLANHDLE-NI t81 FFDK-D-Q------ARPIIANAHIVPVN----DQWTGPMAQWCSSVIKCGLANHELE-DV t21 FFDK-D-Q------ARPIIANAHIVPIY----DQWTGPMAQWCSSVIKCGLANHELK-VV t80 FFDK-D-Q------ARPVIASAHIVPEH----DQWSGAMAQWCSSVIKCGLANHDLE-NV t14 FFDK-D-Q------ARPVIASAHIVPEH----DQWSGAMAQWCSSVIKCGLANHDLE-NV t85 FFDK-D-Q------ARPVIANAHIVPEH----DQWSGAMAQWCSSVIKCGLANHDLE-NV t62 FFDK-D-Q------ARPVIANAHIVPEY----DQWSGAMAQWCSSVIKCGLANHDLE-NV t19 FFDK-D-Q------ARPVIANAHIVPEY----DQWSGAMAQWCSSVIKCGLANHDLE-NV t77 FFDK-D-Q------ARPVIANAHIVPEY----DQWSGAMPQWVSSVIKCGLANHDLE-NV t88 LFDK-E-E------NRPMVQDADSVVGS----AQWAGQHRSWCSSDDKA----------- t37 LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA----------- t35 LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA----------- t48 LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA----------- t55 LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA----------- t46 LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA----------- t67 LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA----------- t57 LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA----------- t56 LFEK-E-E------NRPMVQDAGSVPER----AQWPGQQRAWCSSDDKA----------- t13 LFDK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA----------- t5 LFDK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA----------- t38 LFLK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA----------- t33 LFLK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA----------- t100 LFLK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA----------- t34 LFLK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA----------- t84 LFLK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA----------- t92 LFLK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA----------- t3 LFDK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA----------- t22 LFDK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA----------- t64 HFDK-E-E------ARPIVQDAASVPGK----AQWVGRVRAWCSSDVKA----------- t18 HFDK-E-E------ARPIVQDAASVPGK----AQWVGRVRAWCSSDVKA----------- t68 HFDK-E-E------ARPIVQDAASVPEK----AQWVGRVRAWCSSDVKA----------- t28 HFDK-E-E------ARPIVKDAASVPGK----AQWVGRIRAWCSSDVKA----------- t82 HFDK-E-E------ARPIVQDAASVPGE----AQWVGHIRAWCSSDIKA----------- t41 HFDK-E-E------ARPIVQDAASVPGE----AQWVGHIRAWCSSDIKA----------- t71 HFDK-E-E------ARPVVQDAPGVPGK----AQWVGKVRAWCSSDVKA----------- t94 HFDK-E-E------ARPVVQDAPGVPEQ----AQWVGKMRAWCSSDVKA----------- t99 HFDK-EKE------ARPVVQDAPGVPGK----AQWVGRIRAWCSSDVKA----------- t40 HFDK-E-E------ARPVVQDAPGVPGK----AQWVGRVRAWCSSEIKT----------- t90 HFDK-E-E------ARPVVQDAPGVPGK----AQWVGRVRAWCSSEIKT----------- t4 SFDK-E-E------ARPVVQDAPGVPGR----AQWVGRVRPWCSSDVKA----------- t36 FFAK-D-E------DRPGIQNAVSVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEP t87 FFAK-D-E------DRPGIQNAMSVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEP t89 FFAK-D-E------ERPGIQNAMNVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEP t39 FFAK-D-E------ERPGIQNAMNVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEP t8 FFAK-D-E------ERPGIQNAMNVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEP t6 FFAK-D-E------ERPGIQNAMNVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEP t58 FFAK-D-E------DRPGIQNAMSVPCG----DQWVGSVRGWCSSQHRYGLAIHILVHQQ t54 FFAK-D-E------DRPGIQNAMSVPCG----EQWVGSVRGWCSSQHRYGLAIHILVHQQ t65 FF------------DRPGIANAMSVKCG----DQWVGSIRGWCSSQHQYGLANHILVHGP t29 FF------------DRPGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILIHGP t98 FF------------DRPGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILVHNP t83 FF------------DRPGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILIHNP t95 FF------------DRPAIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILVHGP t20 FF------------DRPGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILVHGP t69 FF------------DRPGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILVHGP t2 FF------------DRPGIANAMSVPCG----DQWMGSIKGWCSSQHQYGLANHILVHGP t10 FF------------DRPGIANAMSVPCG----DQWMGSIKGWCSSQHQYGLANHILVHGP t31 FF------------DRPGIANAMNVPCG----DQWVGCIRGWCSSQHQYGLANHILVHGP t15 FF------------DRPGIANAMSVPCG----DQWVGCIRGWCSSQHQYGLANHILVHGP t63 FF------------DRPGIANAMSVPCG----DQWMGIIRGWCSSQHQYGLANHILVHGP t50 FF------------DRPGIANAMSVPCG----DQWMGVIRGWCSSQHQYGLANHVLVHGP t25 FF------------DRPGIANAMSVPCG----DQWMGVIRGWCSSQHQYGLANHILVHGP t51 FF------------DRPGIANAMSVPCG----DQWVGVIRGWCSSQHQYGLANHILVHGP t9 FFDK-D-D------DRPGIANAMSVPCG----EQWGGPTRGWCSSQQKFGLASHTLSHDP t47 FFDK-D-D------DRPGIANAMSVPCG----EQWGGPTRGWCSSQQKFGLASHTLSHDP t60 FFDK-D-D------DRPGIANAMSVPCG----EQWGGPTRGWCSSQQKFGLASHTLSHNP t30 FFDK-D-D------DRPGIANAMSVPCG----EQWGGPTRGWCSSQQKFGLASHTLSHTP t70 FFDK-D-D------DRPGIANAMNVPCG----DQWGGPIRGWCSSQHKFGLAFHTLSHDP t91 FFDK-D-D------DRPGIANAMNVPCG----DQWGGPIRGWCSSQHKFGLAFHTLSHDP t49 FFDK-D-D------DRPGIANAMNVPCG----DQWGGPIRGWCSSQHRFGLAFHTLKHDP t52 FFDK-D-D------DSPGIANAMNVPCG----DQWGGPIRGWCSSQHKFGLASHTLSHDP t43 FFDK-E-D------DRPGIANAVSVPCA----DQVGGPIRGWCSSQVNFGLANHTLVQ-P t1 FFDK-E-D------DRPGIANAVSVPCA----DQVGGPIRGWCSSQVKFGLANHTLVQ-P t86 FFDK-E-D------DRPGIANAVSVPCA----DQVGGPIRGWCSSQVKFGLANHTLVQ-P t72 FFDK-E-D------HRPGIANAVSVPCA----DQVGGPLRGWCSSQVKFGLANHTLVQ-P t96 FFDK-E-D------ERPGIANAVSVPCA----DQVGGPIRGWCSSQVKFGLANHTLVQ-P t44 FFDK-E-D------DRPGIANAVSVPCG----DQVGGPIRGWCSSQVKFGLANHTLAQNP t45 FFDK-E-D------DRPGIANAVSVPCT----DQVGGPIRGWCSSQVKFGLANHTLVHNP t74 FFDK-E-D------DRPGIANAVSVPCA----DQVGGPIRGWCSSQVKFGLANHTLVHNP t26 FFDK-E-D------DRPGIANAVSVPCD----DQWGGPVRGWCSSQHKFGLADHTLIHIP t61 FFDK-E-D------DRPGVANAVSVPCD----DQWGGPVRGWCSSQHKFGLADHTLVHIP t97 FFDK-E-D------DRPGVANAMSVPCD----DQWGGPVRGWCSSQHKFGLADHTLVHVP t17 FFDK-E-D------DRPGIANAVAVPCS----DQWGGPVRGWCSSQHKFGLADHTLVHVP t11 FFDK-E-D------DRPGIANAVAVPCS----DQWGGPVRGWCSSQHKFGLADHTLVHVP t23 FYDK-D-E------DRPGIANAVSVPVA----EQWVGSVRGWYSSEHKYGLANHILI-GP t76 NFIQLS-MVRDTPHRLPKMASAAPVYKQTCHLEQVQGTARMWGKSGPRVVLNGHMLK-TQ t53 NFIQLS-MVRDTPHGLPKMASAAPVYKQTCHLEQVQGTARMWGKSGPRVVLNGHMLK-TQ t16 NFIQLS-MVRDTPHGLPKMASAAPVYKQTCHLEQVQGTARMWGKSGPRVVLNGHMLK-TQ t42 NFIQLS-VVHDTPHRLPKMASAAAVFKQTCHLEQVPGTARMWGKSGPRVRLNGHVLK-NQ t24 NFIQLS-VVHDTPHRLPKMASAAAVFKQTCHLEQVAGTARVWGKSGPRVKLNGHVLK-NQ t73 SDLYRCVWMDCHLGANHYRQISRMKMITPTYTHMTEYANGL----WHPFYKASDHKNEAQ t66 SDLYRCVWMDCHLGANHYRQISRMKMITPTYTHMTEYANGL----WHPFYKASDHKNEAQ t32 SDLYRCVWMDCHLGANHYRQISRMKMITPTYTHMTEYANGL----WHPFYKASDHKNEAQ t75 SDLYRCVWMECVLGANHYQQVSRMKMVTPAYTHMTEYANGL----WHPFYKNPDHKNEAQ t79 SRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVMEYAHGL----WKPFYQASDHKNEAQ t27 SRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVMEYAHGL----WKPFYQASDHKNEAQ t93 SRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVMEYAHGL----WKPFYQASDHKNEAQ t59 SRLLRCVVMECILGANVYHQISRMKMHGPTYSHVMEYAHGM----WKPFYQASDHKNEAQ t7 SRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVMEYAHGM----WKPFYQASDHKNEAQ t78 SRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVMEYAHGM----WKPFYQASDHKNEAQ t12 SRLLRCVVMECVLGANVYHQASRMKMHGPYYAHVTEYAHGL----WKPFYQTSDHKNEAQ t81 SRLLRCVGMECNLGANHYHQVSRMKMHGPIYSHMTEYAVGL----WKPFYQASEHKNEAQ t21 SRLLRCVGMECNLGANHYHQVSRMKMHGPIYSHMTEYAHGL----WKPFYQASEHKNEAQ t80 SRLLRCVMIDCNQGANHYIQISRMKVHGPTYSHMTEYAHGL----WKPFYQASDHKNDAQ t14 SRLLRCVMIDCNQGANHYIQISRMKVHGPTYSHMTEYAHGL----WKPFYQASDHKSDAQ t85 SRLLRCVEIDCNQGANHYVQISRMKMYGPTYSHMTEYAHGL----WKPFYQASDHKNDAQ t62 SRLLRCVVIDCNHGANHYVQISRMKMHGPTYSHMTEYAHGL----WKPFYQASDHKNDAQ t19 SRLLRCVVIDCNHGANHYVQISRMKMHGPTYSHMTEYAHGL----WKPFYQASDHKNDAQ t77 SRLLRCVVIDVNQGANHYVQISRMKMHGPTYSHMTEYAHGL----WKPFYQASDHKNDAQ t88 SQLPRNTHIVVEIGANVYEQFSRMKTNIPIYAHVTEYAVGV----ERPFYE-SEFKNEAQ t37 SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVTEYAVGV----ARPFYD-AEFKNEAQ t35 SQLPRNTRIVAEIGANVYEQYSRMKTVIPIYARVTEYAVGV----PRPFYD-AEFKNEAQ t48 SQLPRNTRIVAEVGANVYEQYSRMKTVIPIYARVTEYAVGV----PRPFYD-AEFKNEAQ t55 SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVTEYAVGV----ARPFYD-AEFKNEAQ t46 SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVTEYAVGV----ARPFYD-AEFKNEAQ t67 SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVTEYAVGV----ARPFYD-AEFKNEAQ t57 SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVTEYAVGV----ARPFYD-AEFKNEAQ t56 SKLPRNTKMVAEIGANVYEQASRMKTNIPIYAHVTEYALGV----DRPFYD-SEFKNEAQ t13 SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVGI----VKPFYD-NEWKQEAQ t5 SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVGI----VKPFYD-NEWKQEAQ t38 SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVGI----VKPFYD-SEWKQEAQ t33 SHLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVGI----VKPFYD-SEWKQEAQ t100 SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVGI----VKPFYD-SEWKQEAQ t34 SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVGI----VKPFYD-SEWKQEAQ t84 SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVGI----VKPFYD-SEWKQEAQ t92 SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVGI----VKPFYD-SEWKQEAQ t3 SRLPRNDPVVHEVGANVYQQISRMKTSIPIYAHVTEYAVGI----IKPFYD-NEWKQEAQ t22 SQLPTNDPIVPEVGANVYQQVSRMKTSIPIYAHVTEYAVGI----VKPFYD-NEWKQEAQ t64 SKLPRNDPIVPEVGANVYQQLSRMKTNIPIYAHVTEYAVGI----VKPFYD-NEWKQEAQ t18 SKLPRNDPIVPEVGANVYQQLSRMKTNIPIYAHVTEYAVGI----VKPFYD-NEWKQEAQ t68 SKLPRNDPIVPEVGANVYQQISRMKTNIPIYAHVTEYAVGI----VKPFYD-NEWKQEAQ t28 SQLPRNDPIVPEVGANVYQQDSRMKTNIPIYAHVTEYACGI----VKPFYE-NEWKQEAQ t82 SQLPRNDPIVPEVGANVYQQVSRMKTNIPIYAHTTEYACGI----VKPFYE-NEWKQEAQ t41 SQLPRNDPIVPEVGANVYQQVSRMKTNIPIYAHTTEYACGI----VKPFYE-NEWKQEAQ t71 SQLPRNDTIVPEVGANVYQQVSRVKTHIPIYAHVTEYAVGI----VKPFYN-NEWKQEAQ t94 SQLPRNDTIVPEVGANVYQQVSRVKTNIPIYAHVTEYAVGI----VKPFYN-NEWKQEAQ t99 SELPRNDAIVPEVGANVYQQVSRMKTNIPIYAHATEYAVGI----VKPFYD-NEWKQEAQ t40 SELPRNDAIVVEVGANVYQQISRMKTDIPIYAHATEYAVGI----VKPVYD-NEWKQEAQ t90 SELPRNDAIVPEVGANVYQQISRMKTDIPIYAHATEYAVGI----VKPVYD-NEWKQEAQ t4 SELPRNDAIWPEVGANVYQQISRMKTNIPIYPHVTEYAVGI----VKPFYL-NEWKQEAQ t36 SKLHKTYRINRKVGANAYDQDSRMKAAAPVYPHIMEYAHGM----FNPFYGLSEPKNNAQ t87 SKLHKTYRINRKVGANAYDQDSRMKAAAPVYPHIMEYAHGM----FNPFYGLSEPKNNAQ t89 SKLHKTYKINRKVGANAYDQVSRMKAAAPVYPHIMEYAHGM----FNPFYGLSEPKNNAQ t39 SKLHKTYKINRKVGANAYDQVSRMKAAAPVYPHIMEYAHGM----FNPFYGLSEPKNNAQ t8 SKLHKTYKINRKVGANAYDQVSRMKAAAPVYPHIMEYAHGM----FNPFYGLSEPKNNAQ t6 SKLHKTYKVVRKVGANAYDQVSRMKAAAPVYPHIMEYAHGM----FNPFYGLSEPKNNAQ t58 SRLHKTYNIDRKNGANAYEQDSRMKAGAPVYPHIMEYAHGM----FNPFYGMSEPKNNAQ t54 SRLHKTYNIDRKNGANAYDQDSRMKAGAPVYPHIMEYAHGM----FVPFYGMSEPKNNAQ t65 SKLHRTYAINKKMGANVYQQTSRMKAAAPLYPHIMEYAHGM----VKPFYGISEPKNEAQ t29 SKLHRTYAINKKMGANVYQQTSRMKAAAPLYPHIMEYAHGM----VKPFYGQSEPKNEAQ t98 SRLHRTYAINKKMGANVYQQSSRMKAAAPLYPHIMEYAHGM----VKPFYGISEPKNEAQ t83 SRLHRTYAINKKMGANVYQQTSRMKAAAPLYPHIMEYAHGM----VKPFYGISEPKNEAQ t95 SKLHRTYAINRKVGANVYQQTSRMKAAAPLYPHIMEYAHGM----VKPFYGISEPKNEAQ t20 SKLHRTYAINRKVGANVYQQMSRMKAAAPLYPHIMEYAHGM----VKPFYGISEPKNEAQ t69 SKLHRTYAINKKMGANVYQQISRMKALAPLYPHIMEYAHGM----VKPFYGISEPKNEAQ t2 SKLHRTYAINKKMGANVYQQISRMKALAPLYPHIMEYAHGM----VKPFYGISEPKNEAQ t10 SKLHRTYAINKKMGANVYQQISRMKALAPLYPHIMEYAHGM----VKPFYGISEPKNEAQ t31 SKLHRTYAINKKMGANVYQQTSRMKAAAPIYPHIMEYAHGM----VKPFYGISEPKNEAQ t15 SKLHRTYAINRKMGANVYQQTSRMKAAAPLYPHIMEYAHGM----VEPFYGISEPKNEAQ t63 SKLHRTYAINKKMGANVYQQDSRMKAAAPLYPHIMEYAHGM----VKPFYGISEPKNEAQ t50 SKLHRTYAINKKMGANVYQQDSRMKAAAPLYPHIMEYAHGM----VKPFYGISEPKNEAQ t25 SKLHRTYAINKKMGANVYQQDSRMKAAAPLYPHIMEYAHGM----VKPFYGISEPKNEAQ t51 SKLHRTYAINKKMGANVYQQDSRMKAAAPLYPHIMEYAHGM----VKPFYGISEPKNEAQ t9 SSLHRTFAINRKMGANVYQQTSRMKASAPFYPHCMEYAHGV----CKPFYEHSEPKNEAQ t47 SNLHRTFAINRKMGANVYQQASRMKASAPFYHHCMEYAHGV----CKPFYEHSEPKNEAQ t60 SNLHRTFAINRKMGANVYQQTSRMKASAPFYHHCMEYAHGV----CKPFYEHSEPKNEAQ t30 SNLHRTFAINRKMGANVYQQTSRMKASAPFYHHCMEYAHGV----CKPFYEHSEPKNEAQ t70 SKLHRTFAINQKMGANVYQQMSRMKTPAPFYPHVMEYAHGV----CKPFYEHSEPKNEAQ t91 SKLHRTFAINQKMGANVYQQMSRMKTPAPFYPHVMEYAHGV----CKPFYEHSEPKNEAQ t49 SKLHRTFAINEKMGANVYQQNSRMKASAPFYPHVMEYAHGV----CKPFYEHSEPKNDAQ t52 SKLHRTFGINKNMGANVYQQTSRMKANAPFYPHVMEYAHGV----CKPFYEHSEPKNEAQ t43 SKLHRTFKINKKMGANVYQQTSRMKAEAPVYPHIMEYAHGV----CKPFYERSEPKNEAQ t1 SKLHRTFKINKKMGANVYQQTSRMKAEAPVYPHIMEYAHGV----CKPFYERSEPKNEAQ t86 SKLHRTFKINKKMGANVYHQTSRMKAEAPVYPHIMEYAHGV----CKPFYERSEPKNEAQ t72 SKLHRTFKINKKIGANVYQQTSRMKADAPVYPHIMEYAHGV----YKPFYERSEPKNEAQ t96 SKLHRTFKINKKIGANVYQQTSRMKAEAPVYPHIMEYAHGV----CKPFYERSEPKNEAQ t44 SKLHRTFKINKKMGANVYQQTSRMKAEAPVYPHIMEYAHGV----CKPFYDRSEPKNEAQ t45 SKLHRTFAINKKMGANVYQQTSRMKAEAPVYPHIMEYAHGV----CKPFYERSEPKNEAQ t74 SKLHRTFAINKKMGANVYQQTSRMKAEAPVYPHIMEYAHGV----CKPFYERSEPKNEAQ t26 SKLHRTFAINKKMGANVYQQTSRMKAAAPVYSHVMEYAHKV----VKPFYARSEPKNEAQ t61 SKLHRTFAITKKMGANVYQQTSRMKATAPVYPHVMEYAHGV----VKPFYDRSEPKNEAQ t97 SKLHRTFPINKKMGANVYQQTSRMKATAPVYPHVMEYAHGV----IKPFYDRSEPKNEAQ t17 SLLHRTFAINRKMGANVYQQTSRMKAAAPVYPHMMEYAHGV----VKPFYERSEPKNEAQ t11 SLLHRTFAINKKMGANVYQQTSRMKAAAPVYPHMMEYAHGV----VKPFYERSEPKNEAQ t23 SKLHRTYQTTAKMGANVVKATNRMKRPQPVYPHVMEYANGV----VKPFYEVAESKNEAQ t76 NTLHRQYLVDVELQATMVFVAARMKTDSSMYMSVREILHG------------CGIKDEAQ t53 NTLHRQYLVDVELGATMVFVAARMKTDLSMYMSVREILHG------------CGIKDEAQ t16 NTLHRQYLVDVELGATMVFVAARMKTDLSMYMSVREILHG------------CGIKDEAQ t42 NILHRQYLVDVNLGATMVFVLARMKTDASMYMSHREILHGGTFDAAKPFRQICGTKDEAQ t24 NVLHRQYLVDVDLGATMVFVAARMKTDASMYMSHREILHGGTFDAAKPFRQICGVKDEAQ t73 GVGEK-PHQLPCVESETCQYETKHAKVCKQVHG-ALYLIGMKTVVKPLLGDWEGCKCRS- t66 GVGEK-PHQLPCVESETCQYETKHAKVCKQVHG-ALYLIGMKTVVKPLLGDWEGCKCRS- t32 GVGEK-PHQLPCVESETCQYETKHAKVCKQVHG-ALYLIGMKTVVKPLLGDWEGCKCRS- t75 GVGEK-PHQLPCVESETCQYETKHAKVCKQVHG-ALYLIGMKTVAKPVLGDWEGCKCRSD t79 GVGER-THQLPCVESHTCQYEVKHAKVCKLVHH-ALYLLGLRTVMKPHLGDVDGCRCRSD t27 GVGER-THQLPCVESHTCQYEVKHAKVCKLVHH-ALYLLGIHTVMKPHLGNVDGCRCRSD t93 GVGER-THQLPCVESHTCQYEVKHAKVCKLVHH-ALYLLGIHTVMKPHLGNVDGCRCRSD t59 GVGER-THQLPCVDSHTCQYEVKHAQVCKLVHH-ALYLLGLRTVMKPHLGDVDGCRCRSD t7 GVGER-THQLPCVDSHTCQYEVKHAQVCKLVHH-ALYLLGLRTVMKPHLGDVDGCRCRSD t78 GVGER-THQLPCVDSHTCQYEVKHAQVCKLVHH-ALYLLGLRTVMKPHLGDVDGCRCRSD t12 GVGES-THQLPCVESQTCQYEVKHAKVCKLVHH-ALYLLALHTMMKPRLGDIDGCRCRSE t81 GLGER-PHQLPCVESQTCQYEIKVAKVCKLVHV-ALYLVGLKTVVKPKLGDWDGIRCRSE t21 GVGER-PHQLPCVESQKCQYEVKVAKVCKLVHV-ALYLVGLKTVVKPKLGDWDGVRCRSD t80 GVGER-PHQLPCVESQTCQYEMKHAKVCKLVHG-ALYLIALKTVVKPKLGVWQGCRCRSQ t14 GVGER-PHQLPCVESQTCQYEMKHAKVCKLVHG-ALYLIALKTVVKPKLGVWQGCRCRSQ t85 GVGER-PHQLPCVESQTCQYEVKHANVCKLVHA-ALYLIGLKTVVKPELGVWEGCRCRSQ t62 GVGER-PHQLPCVEPQTCQYEVKHAKVCKLVHG-ALYLIGLQTVVKPKLGVWEGVQCRSQ t19 GVGER-THQLPCVEPQTCQYEVKHAKVCKLVHG-ALYLIGLQNVVKPKLGVWEGVQCRSQ t77 GVGER-VHQLPCVEPQTCQYEVKHAKVCKHVHG-ALYLIGLQTVVKPKLGVWEGCQCRSQ t88 GWGES-GTSIPCVDSPDVQYEMKVAWVDKLMHT-ALYLMPLATVHKPEMGTVRGERCRAI t37 GQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKGERCRTI t35 GQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPQMGTVKGERCRTI t48 GQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKGERCRTI t55 GQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLSTVHKPEMGTVKGERCRTI t46 GQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKGERCRTI t67 RQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKGERCRTI t57 GQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKGERCRTI t56 GQGES-GTSIPCVDSPDVQYELKHAWVDKLMHT-ALYLMPLPTVHKPKMGTVKGERCRAM t13 GQGEA-GACIPCVDSKDVQYELKHAYVKKLMHT-SLYLMHIETCHKPVMGTVKGNRCRAI t5 GQGEA-GACIPCVDSKDVQYELKHAYVKKLMHT-SLYLMHIETCHKPVMGTVKGNRCRAI t38 GQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTIKGNRCRAI t33 GQGEA-GACIPCVDSPDVQYELKHAYVKKLVHN-SLYLMHIDTCHKPAMGTVKGNRCRAI t100 GQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTVKGNRCRAI t34 GQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTVKGNRCRAI t84 GQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTVKGNRCRAI t92 GQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTVKGNRCRAI t3 GQGEA-GASIPCVNSPDVQYELKHAHIKKLMHT-SLYLMHLGTCHKPVMGTVKGDRCRAI t22 GQGEA-GASIPCVNSPDVQYELKHAHIKKLMHT-SLYLMHLKTCHKPVMGTVKGDRCRAI t64 GVGEA-GASIPCVDSPDVQYELKHAEVRKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAI t18 GVGEA-GASIPCVDSPDVQYELKHAEVRKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAI t68 GVGEA-GASIPCVDSPDVQYELKHAEVRKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAI t28 GQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAI t82 GQGEA-GASIPCVDSPDVQYELKHANVKKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAI t41 GQGEA-GASIPCVDSPDVQYELKHANVKKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAI t71 GQGEA-GASIPCVDSPDDQYELKHAEVKKLMHD-SLYLPHLETCHKPIMGTVKGGRCRAS t94 GQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLPHLETCHKPIMGTVKGGRCRAS t99 GQGET-GASIPCVDSPDVQYEMKHAEVKKLMHD-SLYLMHIETCHKPIMGTVKGDRCRAT t40 GQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAT t90 GQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAT t4 GQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLWHLETCVKPIMGTVKGDRCRAT t36 GNGEN-PMNKPCVESEDCQYEKKHASMDKLMHQ-SLYLMHINIMSKPAMGEWVGNRCRNE t87 GNGEN-PMNKPCVESEDCQYEKKHASMDKLMHQ-SLYLMHINVMSKPAMGEWVGNRCRND t89 GNGEN-PMNVPCVESEDCQYEKKHASMTKLMHQ-SLYLMHINIMSKPAMGEWVGVRCRNE t39 GNGEN-PMNVPCVESEDCQYEKKHASMTKLMHQ-SLYLMHINIMSKPAMGEWVGVRCRNE t8 GNGEN-PMNVPCVESEDCQYEKKHASMTKLMHQ-SLYLMHVNIMSKPAMGEWVGVRCRNE t6 GNGEN-PMNVPCVESEDCQYEKKHASMTKLMHQ-SLYLMHINIMSKPAMGEWVGVRCRNE t58 GNGEN-PMNVPCVESDDCQYEKKHASMDKQMHQ-SLYLMHMNIMSKPAMGEWVGNRCRNE t54 GNGEN-PMNVPCVESDDCQYEKKHASMDKQMHQ-SLYLMHMNIMSKPAMGEWVGNRCRNE t65 GNGEN-PMNVPSVESGECQYEHKHAAMEKLMHH-SLYLMRVNTMSKPVMGDWVGNRCRAE t29 GNGEN-PMNVPSVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMPKPVMGDWDGNRCRAE t98 GNGEN-PMNVPSVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGNRCRAE t83 GNGEN-PMNVPSVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGNRCRAE t95 GNGEN-PMNVPSVESGECVYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGHRCRAE t20 GNGEN-PMNVPSVESGECVYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGHRCRAE t69 GNGEN-PMNVPCVESGECQYENKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGNRCRAE t2 GNGEN-PMNVPCVESGECQYENKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGNRCRAE t10 GNGEN-PMNVPCVESGECQYENKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGNRCRAE t31 KNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGNRCRAE t15 KNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPVLGDWVGNRCRAE t63 KNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRANTMSKPVMGDWVGSRCRAE t50 KNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRANTMSKPVMGDWVGSRCRAE t25 KNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRANTMSKPVMGDWVGSRCRAE t51 KNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPIMGDWVGNRCRAD t9 GNGEK-PMNVPCVESIDCQYENKHATMTKLMHH-SLYLMRMHTMSKPIMGDVNGNRCRAD t47 GNGEK-PMNVPCVESRDCQYENKHATMTKLMHQ-SLYLMRMHTMSKPIMGHVNGNRCRAD t60 GNGEK-PMNVPCVESIDCQYESKHAAMTKLMHQ-SLYLMRIHTMSKPIMGDVNGNRCRAD t30 GNGEK-PMNVPCVESIDCQYENKHATMTKLMHQ-SLYLMRMHTMSKPIMGDVNGNRCRAD t70 GNGEKVPMNVPCVESMDCQYENKHAPMAKLMHQDSLYLMRMHTMSKPLMGDVNGNRCRAD t91 GNGEKVPMNVPCVESMDCQYENKHAPMAKLMHQ-SLYLMRMHTMSKPLMGDVNGNRCRAD t49 GNGDKVPMNVPCVESMDCQYENKHAPMAKLMHQ-SLYLMRMYTMSKPIMGDVNGNRCRAE t52 GNGEK-PMNVPCVESIECQYENKHATMAKLMHD-SLYLMRMHTMSKPVMGDVNGHRCRAD t43 GNGEK-QGNVPCVESMECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAG t1 GNGEK-QGNVPCVESMECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAK t86 GNGEK-QGNVPCVESMECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAG t72 GNGEK-QGNVPCVESVECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAG t96 GNGEK-QGNVPCVESMECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAG t44 GNGEK-QGNVPCVESMDCQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGYVDGHRCRAG t45 GNGEK-QGNVPCVESMDCQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAG t74 GNGEK-QGNVPCVESMDCQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAG t26 GNGEK-QWNVPCVESMDCQYENKHATMAKLMHP-PLYLMRMQTMAKPPMGDVDGHRCRAG t61 GNGEK-QWNVPCVESMDCQYENKHATMAKLMHP-PLYLVKMQTMSKPPMGDVDGHRCRAG t97 GNGEK-QWNVPCVESMDCQYENKHATMAKLMHP-PLYLVKMQTMSKPPMGDVDGHRCRAG t17 GNGEK-QWNVPCVQSVDCQYENKHATMAKLMHP-PLYLMRMETMSKPPMGDVDGVRCRAG t11 GNGEK-QWNVPCVQSVDCQYENKHATMAKLMHP-PLYLMRMETMSKPPMGDVDGVRCRAG t23 GNGEK-PVNVPCVESPDCQYESKHARVNKLMHP-SLYLMCMEAMNKPIMGDWDGNRCRSP t76 ADGQR-PATLIVVLSKDNKGITEHGAVLKKIHP----------LCKRCMENGRCLRYKND t53 ADGQR-PATLIVVLSKDNKGITEHGAVLKKIHP----------LCKRCMENGRCLRYKND t16 ADGQR-PATLIVVLSKDNKGITEHGAVLKKIHP----------LCKRCMENGRCLRYKND t42 PDGRR-PATLLVVLSKDNQGITEHGAVLKHVHP----------LCKKDCNNNRCLRCKNV t24 PDGQR-PATLLVVLSKDNVGITEHGAVLKHVHP----------LCKKDCNNNRCLRCKNV t73 --KLYVSQLDH----KTLSK-NLGLY--MQKRTWLATAPLQ------IGCMFMLVGRKKN t66 --KLYVSQLDH----KTLSK-NLGLY--MQKRTWLATAPLQ------IGCMFMLVGRKKN t32 --KLYVSQLDH----KTLSK-NLGLY--MQKRTWLATAPLQ------IGCMFMLVGRKKN t75 LIKLYVSQLDH----KTLSK-NMGLY--MRNRTWLATSPLQ------IGCIFMLVGRKKN t79 LNNLILSQLDD----KTLSK-NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQ t27 LNNLILSQLDD----KTLSK-NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQ t93 LNNLILSQLDD----KTLSK-NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQ t59 LNNLILSQLDD----KTLSK-NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQ t7 LNNLILSQLDD----KTLSK-NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQ t78 LNNLILSQLDD----KTLSK-NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQ t12 LNKLALSQLDD----KTLSK-NCFQY--LGNMTWLATSPLH------VGCMIIF------ t81 LNKLYLSQLDD----KTLSK-NCFIY--LGNMTWLATSPLQ------IGWLVLFDGRKRQ t21 LNKLYLSQLDD----KTLSK-NCFRY--LGNMTWLATSPLQ------NGCVVIFAGRKRQ t80 LSKLVLSQLDE----KTLSK-NCSIY--LGNMTWLATSPLQ------IGCVILFGGRKRQ t14 LSKLVLSQLDE----KTLSK-NCSIY--LGNMTWLATSPLQ------IGCVILFGGRKRQ t85 LSKLALSQLDE----KTLSK-NCSIY--LGNMTWLATSPLQ------IGCIILFGGRKRQ t62 LSKLILSQLDD----KTLSK-NCSIY--LGNMTWLATSPLQ------IGCIIMFDGRKRQ t19 LSKLILSQLDD----KTLSK-NCSIY--LGNMTWLATSPLQ------IGCIILFDGRKRQ t77 LSKLILSQLDD----KTLSK-NCNIY--LGNMTWLATFPLQ------IGCIILFDGRKRQ t88 L-KLLMMQLD------TLSR-NQLPK--LCQGTWLDASPLQ------IGVQVMLVGKKGG t37 L-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRG t35 L-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRG t48 L-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRG t55 L-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRG t46 L-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVKVVLVGKKRG t67 L-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRG t57 L-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRG t56 L-KLNMMQLDE----KTLSQ-NQIIK--LCQRTWLDASPLQ------IGVNCVLPGKKGG t13 L-KLSMIQLDQ----KTLSQ-NQRAK--LCQRTWLDTSPLQ------MGMTLVLVGKKVG t5 L-KLSMIQLDQ----KTLSQ-NQRAK--LCQRTWLDTSPLQ------MGMTLVLVGKKVG t38 L-KLTMIQLDQ----KTLSQ-NQRAR--LCQRTWLDTSPLQ------MGMTIVLVGKKVG t33 L-KLTMIQLDQ----KTLSQ-NQRAR--LCQRTWLDTSPLQ------MGMTIVLVGKKVG t100 L-KLTMIQLDQ----KTLSQ-NQRAR--LCQRTWLDTSPLQ------MGMTIVLVGKKVG t34 L-KLTMIQLDV----KTLSQ-NQRAR--LCQRTWLDTSPLQ------MGMTIVLMGKKVG t84 L-KLTMIQLDV----KTLSQ-NQRAR--LCQRTWLDTSPLQ------MGMTIVLMGKKVG t92 L-KLTMIQLDV----KTLSQ-NQRAR--LCQRTWLDTSPLQ------MGMTIVLMGKKVG t3 L-KLSMIQLDQ----KTLSQ-NVRAR--LCQRTWLDTSPLQ------MGMVAVLLGKKLG t22 L-KLKMIQLDQ----KTLSQ-NQAAR--LCKRTWLDTSPLQ------MGMNVVLIGQKLG t64 L-KLPMIQLDQ----KTLSH-NQPRR--LCQRTWLDASPLQ------VGMNIMLMGKKAP t18 L-KLPMIQLDQ----KTLSH-NQPRR--LCQRTWLDASPLQ------VGMNIMLKGKKAP t68 L-KLPMIQLDQ----KTLSH-NQPRR--LCQRTWLDASPLQ------VGMNIMLMGKKAP t28 L-KLSMIQLDE----KTLSQ-NQRPG--LCQRTWLDANPLQ------MGMNTMLVGKKTP t82 L-KLNMIQLDQ----KTLSQ-NQRPG--LCQRTWLDASPLQ------MGMHIMLVGKKTP t41 L-KLNMIQLDQ----KTLSQ-NQRPG--LCQRTWLDASPLQ------MGMHIMLVGKKTP t71 L-KLKMIQLDQ----KTLSQ-NQQRR--LCQRTWLDSSPLQ------MGMKIMLQGKKTP t94 L-KLNMIQLDQ----KTLSQ-NQQRR--LCQRTWLDASPLQ------MGMKIMLQGKKIP t99 L-KLKMIQLDQ----KTLSQ-NQKRR--LCQRTWLDASPLQ------MGMKMMLQGKKTP t40 L-KLKMIQLDQ----KTLSQ-NQKKR--LCQCTWLDACPLQ------MGMKIMLQGKKTP t90 L-KLKMIQLDQ----KTLSQ-NQKKR--LCQCTWLDACPLQ------LGMKIMLQGKKTP t4 L-KLKMIVLDP----KTLSQ-NQKRR--LCQRTWLDASPLQ------VGMKIMLQGKKTP t36 LTALRIIQLDVGVSGKTLGQ-NIGVSKLLNDRTWLATSPLE------IGCGVMSVEKKEA t87 LTALRIIQLDVGHSGKTLGQ-NIGVSKLLNDRTWLATSPLE------IGCLVMSVEKKEA t89 LTALRIVQLDVGHSGKTLGQ-NIGVSKLLNDRTWLATSPLE------IGCGVISVEKKEA t39 LTALRIVQLDVGHSGKTLGQ-NIGVSKLLNDRTWLATSPLE------IGCGVISVEKKEA t8 LTALRIVQLDVGHSGKTLGQ-NIGVSKLLNDRTWLATSPLE------IGCGVISVEKKEA t6 LTALRIVQLDVGHSGKTLGQ-NIGVSKLLNDRTWLATSPLE------IGCGVISVEKKEA t58 LTALHVVQLDVGFSGKTLGQ-NIGISELLNDRTWLATSPLE------IGCGVMAVEKKEA t54 LTALHIVQLDVGFSGKTLGQ-NIGISELLNDRTWLATSPLE------IGCGVMAVEKKEA t65 LTALQTVQLDVGSSGKTLGQ-NIVTSKKLGDRTWLAASPLQ------IGCGVMVQEKKIA t29 LTALQTVQLDVGSSGKTLGQ-NIVTSKKLGDRTWLAASPLQ------IGCGVMVQEKKIA t98 LTALQTVQLDVGSSEKTLGQ-NIVTSKRLGDRTWLAASPLQ------IGCGVMVQEKKIA t83 LTALQTVQLDLGSSGKTLGQ-NIVTSKKLGDRTWLAASPLQ------IGCGIMVQEKKIA t95 LTELQTVQLDVGSSGKTLGQ-NIVTSKKLGDRTWLAASPLQ------IGCGVMVQEKKIA t20 LTELQTVQLDVGSSGKTLGQ-NIVTSKKLGDRTWLAAPPLQ------IGCDVMVQEKKIA t69 LTALKTGQLDVGSSGKTLGQ-NIMTSKKLGDRTWLAANPLQ------IGRGVMVWEKKVA t2 LTALKTGQLDVGSSGKTLGQ-NIMVSKKLGDRTWLAASPLQ------IGCGVMVWEKKVP t10 LTALKTGQLDVGSSGKTLGQ-NIMVSKKLGDRTWLAASPLQ------IGCGVMVWEKKVP t31 LTALKTVQLDVGSNGKTLGK-NIMTPKKLGDRTWLAASPLQ------IGCGVMVQEKKVA t15 LTALKTVQLDIGNSGKTLGQ-NIMTPKRLGERTWLAASPLQ------IGCGVVVQEKKVA t63 LTALKTVQLDVGSSGKTLGQ-NIMTPKKLGDRTWLAASPLQ------IGCDVMVQEKKVA t50 LTALKTVQLDVGSSGKTLGQ-NIMTPKKLGDRTWLAASPLQ------IGCDVMVQEKKVA t25 LTALKIVQLDVGSSGKTLGQ-NIMTPKKLGDRTWLAASPLQ------IGCDVMVQEKKVA t51 LTALKTVQLDVGSSGKTLGQ-NIMTPKNLGDKTWLAASPLQ------IGCGIMVQEKKVA t9 LTALKMLQLDIGFSAKTLGQ-NVVVPHLLGHRTWLATSPLQ------IGCGVMIFGNKIG t47 LTALKMLQLDIGFSAKTLCQ-NIVVAHLLGHRTWLATSPLQ------IGCMVMIFGNKIG t60 LTALKMLQLDIGFSAKTLGQ-NIVVPHCLGHRTWLATSPLQ------IGCMVMIFGNKIG t30 LTALKMLQLDIGFSAKTLGQ-NIIVPHCLGHRTWLATSPLQ------IGCMVMIVGNKIG t70 LTSLKMLQLDIGFCAKTLGQ-NIVVPKLLGHRTWLATAPLQ------IGCGMMIFGNKIG t91 LTSLKMLQLDIGFCAKTLGQ-NIVVPKLLGHRTWLATAPLQ------IGCGMMIFGNKIG t49 LTSLKMLQLDIGFSAKTLGQ-NIVPPKLLGHRTWLATSPLQ------IGCGVMIFGNKIG t52 LTALKMLQLDIGFSAKTLGQ-NIVAPKLLGVRTWLATSPLQ------IGCGVMIFGNKIG t43 LTALKVVQLDIGFSSKTLGQLNIIDCKLLGHRTWLATSPLQ------IGVDVMVMGNKIG t1 LTALKIVQLDIGFSAKTLGQLNIIDCKLLGRHTWLATSPLQ------IGCDMMVMGNKIG t86 LTALKIVQLDIGFSAKTLGQLNIIDCKLLGHRTWLATSPLQ------IGCDVMVMGNKIG t72 LTALKIVQLDIGFSAKTLGQLNIIDCKLLGHRTWLATSPLQ------IGCDVMVMGNKIG t96 LTALKIVQLDIGFSAKTLGQLNIIDCKLLGHRTWLATSPLQ------IGCDVMVIGNKIG t44 LTALKIVQLDIGFSAKTLGQLNIIDCKLLGHRTWLATSPLQ------IGCDIMVMGNKIG t45 LTALKIVQLDIGFSAKTLGQLNIIDCKLLGHRTWLATSPLQ------IGCDVMVMGNKIG t74 LTALKIVQLDIGFSAKTLGQLNIIDCKLLGHRTWLATSPLQ------IGCDVMVVGNKIG t26 LTALKIVQLDIGFSQKTLGQ-NIIDSKHLGHRTWLATSPLQ------IGCSAMVMGNKIG t61 LTALKIVQLDIGFSSKTLGQ-NIIHSKLLGHRTWLATSPLQ------IGCDVMVMGNKIG t97 LTALQIVQLDIGFSAKTLGQ-NIIHSKLLGHRTWLATSPLQ------IGCDVVVMGNKIG t17 LTALKIVQLDIGFSAKTLGQ-NIIDSKLLGHRTWLATSPLQ------IGCDVMVVGNKIG t11 LTALKIVQLDIGFSAKTLGQ-NIIDSKLLGHRTWLATSPLQ------IGCDVMVVGNKIG t23 LCLLKVIQLDMGVSGKTLGQ-NIVMAKLLGERTWLATSPLQ------IGCDVVAVGKKPE t76 LTGDQLVQLAD----NTCAW-NLFEC--LRDSKSLFGSPLFIKVDEDRGFTAP------- t53 LAGDQLVQLAD----NTCAW-NLFEC--LRDSKSLFGSPLFIKVDEDRGFTAP------- t16 LAGDQLVQLAD----NTCLW-NLFEC--LRDSKSLFGSPLFIKVDEDRGFTAP------- t42 LAGNQLIQLSD----VTCAW-NLFEC--LGDSESLFGSPLAIKVDEDRGFTAP------- t24 LAGNQLVQLAD----ITCAW-NLFEC--LGDSESLFGSPLAIKVDEDRGFTAP------- t73 SEE-NYNKAADPTVIWFYTQPIVYV-ADVFGCTKGKNPQEMRG------NNVMSESLGDD t66 SEE-NYNKAADPTVIWFYTQPIVYV-ADVFGCTKRKNPQEMRG------NNVMSESLGDD t32 SEE-NYNKAADPTVIWFYTQPIVYV-ADVFGCTKGKNPQEMRG------NNVMSESLGDD t75 SEE-NYNKAADPTVIWFYTQPIVYV-ADVFGCTKGKNPQEMRK------NNVISESLGDD t79 SAN-NYNRAGDPTTICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDD t27 SAN-NYNRAGDPTTICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDD t93 SAN-NYNRAGDPTTICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDD t59 SGN-NYNQAGDPTTICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDD t7 SGN-NYNRAGDPTTICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDD t78 SGN-NYNRAGDPTTICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDD t12 ------------------------------------------V------NNVLSQSLGDD t81 SAE-DYNRAADPTTISFCRKPIVQH-ADVFGCDRDKDPQEMRT------NNVISESLGDD t21 SAE-DYNRAADPTTIAFCRKPIVQH-ADVFGCDKDKDPQEMRM------NNVISESLGDD t80 SHGLNYNRAADPTTICFYKKPIVVQGADVFGCARGKDPQEMKA------NNVISGSLGDD t14 SHGLNYNRAADPTTICFYKKPIVVQGADVFGCARGKDPQEMKA------NNVISGSLGDD t85 SHGLNYNRAADPTMICFYKKPIVVQGADIFGCARGKDPQEMKS------NNVISGSLGDD t62 SHGLQYNRAADPTTICFYKKPIVVQGADIFGCARGKDPQEMKA------NNVTSGSLGDD t19 SHGLQYNRAADPTTICFYKKPIVVQGADIFGCARGKDPQEMKA------NNVTSGSLGDD t77 SYGLQYNRAADPTTICFYKKPIVVQGADIFGCARGKDPQEMKA------NNVTSGSLGDD t88 SKK-EYELAADQVIIYFYQAPIIYVKADVFSGTVAKKAQAMR-------KSTGSQSIGDD t37 SKK-DYELAADPVIIYFYQAPIIHHKADVFAGTVAKKSQAMRS------RSIGSQSVGDD t35 SKK-DYELAADPVIIYFYQAPIIYHKADVFAGTVAKKSQAMRS------RSIGSQSVGDD t48 SKK-DYELAADPVIIYFYQAPIIYHKADVFAGTVAKKSQAMRS------RSIGSQSVGDD t55 SKK-DYELAADPVIIYFYQAPIIHHKADVFAGTVAKKSQAMRS------RSIGSQSVGDD t46 SKK-DYELAADPVIIYFYQAPIIHHKADVFAGTVAKKSQAMRS------RSIGSQSVGDD t67 SKK-DYELAADPVIIYFYQAPIIHHKADVFAGTVAKKAQAMRS------RSIGSPSVGDD t57 SKK-DYELAADPVIIYFYQAPIIHHKADVFAGTVAKKAQAMRS------RSIGSPSVGDD t56 SNK-DYELAADPVIIYFYQAPIIHHKADVFSGTVAKKAQAMRQ------QSTGSQSVGDD t13 SKK-DYEVAADPVIMYFYDAPMIMRPTDVFEGTNNKKAQAMRS------RSTASQSIGDD t5 SKK-DYEVAADPVIMYFYDAPMIMRPTDVFEGTNNKKAQAMRS------RSTASQSIGDD t38 SKK-DYEIAADPVIMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDD t33 SKK-DYEIAADPVIMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDD t100 SKK-DYEIAADPVIMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDD t34 SKK-DYEIAADPVIMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDD t84 SKK-DYEIAADPVIMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDD t92 SKK-DYEIAADPVIMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDD t3 SKK-DYEIAADPVIKYFYQAPIIMRRTDVFEGTDDQKAQAMRV------RSTASQSIGDD t22 SKH-DYEIAADPVIMYFYQAPIIMRRTDVFEGTDDQKAQAMRL------RSTASRSIGDD t64 SKK-DYEIAADPVIMYFYQAPIIMRRTDVFEGTHNKKAQAMRV------RSTASQSIGDD t18 SKK-DYEIAADPVIMYFYQAPIIMRRTDVFEGTHNKKAQAMRV------RSTASQSIGDD t68 SKK-NYEIAADPVIMYFYQAPIIMRRTDVFEGTHNKKAQAMRV------RSTASQSIGDD t28 SKK-DYEIAADPVIMYFYQAPIIMRRTDVFEGTNNKKSQAMRV------QSTSSQSIGDD t82 SKK-DYEIAADPVIMYFYQAPIIMQRTDVFEGTNNKKSQAMRV------HSTSSQSIGDD t41 SKK-DYEIAADPVIMYFYQAPIIMQRTDVFEGTNNKKSQAMRV------HSTSSQSIGDD t71 SKK-DYEIAADPAIMHFYRAPIIMRRTDVFEGTNNKKAQAMRI------RSTASQSIGDD t94 SKK-DYEIAADPAIMYFYQATIIMRRTDVFEGTNNKKAQAMRM------RSTASQSIGDD t99 SKK-DYEIAADPVIMYFYQSPIIMRRTDVFEGTNNKKAQAMRV------RSTASQSIGDD t40 SKK-DYEIAADPVIMYFYQSPIIMRRTDVFEGTNNKKAQAMRI------RSTASQSIGDD t90 SKK-DYEIAADPVIMYFYQSPIIMRRTDVFEGTNNKKAQAMRV------RSTASQSIGDD t4 SKK-DYEIAADPVIMYFYQSPIIMRHTDVFEGTNNKKAQAMRL------RSTASQSIGDD t36 SPK-EFEVAADPTVIYFYRNLIIQKITDVLSAVRMESPQEMRT------QDVNSSSLGDN t87 SGK-EFEVAADPTVIYFYRNLIIAKITDVLSAVRMESPQEMRE------QDVNSSSLGDN t89 SPK-EFEVAADPTVIYFYRSLIIVHITDVLSAVRMDSPQEMRN------QDVNSPSLGDN t39 SPK-EFEVAADPTVIYFYRSLIIVHITDVLSAVRMDSPQEMRN------QDVNSPSLGDN t8 SPK-EFEVAADPTVIYFYRSLIIVHITDVLSAVRMDSPQEMRN------QDVNSPSLGDN t6 SPK-EFEVAADPTVIYFYRSLIIVHITDVLSAVRMDSPQEMRN------QDVNSPSLGDN t58 SPK-EFEVAADPTVIYFYRNLIIQHITDVLSAVRMDSPQEMRV------QDVNSPSLGDN t54 SPK-EFEVAADPTVIYFYRNLIIQHITDVLSAVRMDSPQEMRV------QDVNSPSLGDN t65 SPQ-EVEVAAD---IYFYRNMVVQRLTDVVAAVRMQSPQEMRTPVLVCIQKVDSVSLGDD t29 SPQ-EVEVAAD---IYFYRNMVVQRLTDVVAAVRMQSPQEMRTPVLVCIQKVDSVSLGDD t98 SPQ-EVEVAAD---IYFYRNMVVQRHTDVVAAVRMQSPQEMRNPVLVCIQKVDSASLGDD t83 SPQ-EVEVAAD---IYFYRNMVVQRHTDVVAAVRMQSPQEMRNPVLVCIQKVDSISLGDD t95 SPQ-QVEVAAD---IYFYRNMVVQRLTDVVAAVRMQSPQEMRCRKLVCIQKVDSPSLGDD t20 SPQ-QVEVAAD---IYFYRNMVVQRLTDVVAAVRMQSPQEMRTPKLVCIQKVDSPSLGDD t69 SPQ-EVEVAAD---IYFYRNMVIECLTDVVRAVRMQSPQEMRAPVLVCIQKVDSPSLGDD t2 SPQ-EVEVAAD---IYFYRNMVIECLTDVVRAVRMQSPQEMRAPVLVCIQKVDSPSLGDD t10 SPQ-EVEVAAD---IYFYRNMVIECLTDVVRAVRMQSPQEMRAPVLVCIQKVDSPSLGDD t31 SPQ-EVE-AAD---IFFYRNMVIQRLTDVVAAVRMQSPQEMRPPVLVCIQYVDSPSLGDD t15 SPQ-EVEVAAD---IYFYRNMVIQTLTDVVAAVRMQSPQEMRPPVLVCIQDVDSVSLGDD t63 SPQ-EVEVAAD---IYFYRNMVIQRLTDVVAAVRMQSPQEMRLPVLVCIQDVDSPSLGDD t50 SPQ-EVEVAAD---IYFYRNMVIQRLTDVVAAVRMQSPQEMRLPVLVCIQDVDSPSLGDD t25 SPQ-EVEVAAD---IYFYRNMVIQRLTDVVAAVRMQSPQEMRLPVLVCIQDVDSPSLGDD t51 SPQ-EVEVAAD---IYFYRNMVIQRLTDVVAAVRMQSPQEMRPPVLVCIQDVDSPSLGDD t9 SAN-EFEAAADPTVIYFYRNRIVRRLADVISTVRMNSPQEMRN------QDVDSHSLGDD t47 SAN-EFEAAADPTVIYFYRNRIVRRLADVISTVRMNSPQEMRE------QDVDSHSLGDD t60 SEN-EFEAAADPTVIYFYRNRIVRRLADVISTVRMNSPQEMRN------QDVDSHSLGDD t30 SEN-EFEAAADPTVIYFYRNRIVRRLADVISTVRMNSPQEMRN------QDVDSHSLGDD t70 STQ-EFEAAADPTVIYFYRNRIVRRMADVISTVRMKSPQEMRG------QDVDSHSLGDD t91 STQ-EFEAAADPTVIYFYRNRIVRRMADVISTVRMKSPQEMRG------QDVDSHSLGDD t49 STQ-EFESAADPTVIYFYRNRIVKRMADVISTVRMKSPQEMRG------QDVDSHSLGDD t52 STQ-EFEAAADPTVIYFYRNRIIRRVADVISTVRMKSPQEMRE------QDVDSHSLGDD t43 SPS-EFEVAADPTIIWFYRNCIVHKLADTVSTAKMKSPQEMRH------QDVDSPSLGDD t1 SPS-EFEVAADPTIIWFYRDCIVHKLADTVSTAKMKAPQEMRV------QDVDSPSLGDD t86 SPS-EFEVAADPTIIWFYRNCIVHKLADTVSTAKMKAPQEMRV------QDVDSPSLGDD t72 SPS-EFEVAADPTIIWFYRNCIVHKLADTVSTAKMKAPQEMRH------QDVDSPSLGDD t96 SPS-EFEIAADPTIIWFYRNCIVHKLADTVSTAKMKAPQEMRV------QDVDSPSLGDD t44 SPS-EFEVAADPTIIWFYRNCIVHKLADVVSTAKMKSPQEMRV------QDVDSPSLGDD t45 SPA-EFEVAADPTIIWFYRNCIVHKLADLVSTAKMKSPQEMRV------QDVDSPSLGDD t74 SPA-GFEVAADPTIIWFYRNCIVHKLADLVSTAKMKSPQEMRV------QDVDSPSLGDD t26 SPQ-EVELAADPTVIYFYRNCIVQKMADVVSTVKMKSPQEMRV------QDVDSASLGDD t61 SPQ-EVELAADPSVIYFYQNCIVQKMADVVSTVKMKSPQEMRV------QDVDSASLGDD t97 SPQ-EVELAADPSVIYFYQNCIVQKMADVVSTVKMKSPQEMRV------QDVDSASLGDD t17 SPQ-EFELAADPTVIYFYRNCIVQKLADVVSTVKMKSPQEMRV------QDVDSPSLGDD t11 SPQ-EFELAADPTVIYFYRNCIVQKLADVVSTVKMKSPQEMRV------QDVDSPSLGDD t23 SPQ-EFECAADPTVIYFYKNLIIQQAADYVSAVQVKSPQEMRY------QDVNSPSDGDE t76 SK-------VEPKCAQFYSKSCTHC----------------TM------CSVGSHASEED t53 SK-------VEPKCAQFYSKSCTHC----------------TM------CSVGSHASEED t16 SK-------VEPKCAQFYSKSCTHC----------------TM------CSVGSHASEED t42 SK-------VEPKCAQFYSKSCTHH----------------MM------CSVGSNASEED t24 SK-------VEPKCQQFYSKSCTHC----------------VM------CSVGSNASEED t73 NLQD--MSGVPVTVCTSSVMVRKDMQD-SVDKRGCTWNAKE-DHLCPSSFCKGER---ED t66 NLQD--MSGVPVTVCTSSVMVRKDMQD-SVDKRGCTWNAKE-DHLCPSSFCKGER---ED t32 NLQD--MSGVPVTVCTSSVMVRKDMQD-SVDKRGCTWNAKE-DHLCPSSFCKGER---ED t75 NLQD--MPGMPVTVCTSSVMVRKDMHQ-SVDKRGYTWHAKE-DHLCPDSFCKGEK---EE t79 SLQQ--MPAVPVDVCVMSVMVNKSMPQ-SHDKRGYTWQAKQ-DHLFPVNVVKGEQ---EE t27 SLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDKRGYTWQAKQ-DHLFPVNCVKGEQ---EE t93 SLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDKRGYTWQAKQ-DHLFPVNCVKGEQ---EE t59 SLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDKRGYTWQAKQ-DHLFPVNFVKGEQ---EE t7 SLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDKRGYTWQAKQ-DHLFPVNFVKGEQ---EE t78 SLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDKRGYTWQAKQ-DHLFPVNFVKGEQ---EE t12 SLQQ--MHAMPVTVVVVSHMVKKSMPQ-SHDKRGYTWQAKDFDQLIPVSFVKGEQ---EE t81 FLQQ--MPAVPVSVCVYSHMVKKFVPQ-SHEKRGYTWKAKE-DHLVPISYCKGEH---EE t21 FLQQ--MPAVPVSVCVHSHMVKKFVPQ-SHEKRGVTWKAKE-DHLVPISLCKGEH---EE t80 RLQQ--MPAMPVTICVSSYMVKKSVIQ-SHQKRGYTWRAKE-DHLIPVSFCKGEL---DE t14 RLQQ--MPAMPVTICVSSYMVKKSVPQ-SHQKRGYTWRAKE-DHLIPVSFCKGEL---DE t85 RLQH--MPAMPVTICVFSYMVKKAVPQ-SHQKRGYTWRAKE-DHLIPVSFCKGEL---DE t62 RLQQ--MPAMPVTICVFSYMVKKSVPQ-SHVKRGYTWRAKE-DHLIPVSFCKGEL---EE t19 RLQQ--MPAMPVTICVFSYMVKKLVPQ-SHQKRGYTWRAKE-DHLIPVSFCKGEL---EE t77 RLQQ--MPAMPVTICVFSYMVKKSVPQ-SHQKRGYTWRAKE-DHLIPVSFCKGEL---EE t88 GMQS--MPLMQNAVCVWSKMVRKVQPD-GQDKREQTWMAKD-DTLCPPSMKRGEK---TA t37 GMQN--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWMAKD-DTLCPPCEEQGEK---AA t35 GMQN--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWMAKD-DTLCPPCEEQGEK---AA t48 GMQN--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWMAKD-DTLCPPCEEQGEK---AA t55 GMQN--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWMAKD-DTLCPPCEEQGEK---AA t46 GMQN--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWMAKD-DALCPPCKEQGEK---AA t67 GMQN--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWVAKD-DTLCPPCKEQGEK---AA t57 GMQK--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWVAKD-DTLCPPCKDQGEK---AA t56 GTQV--MPLMQNLVCVWSKMVRKCMID-GQEKREQTWMAKD-DKLCPPSQEQGEK---AA t13 DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKD-DTLCPQSRDQGEK---HH t5 DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKD-DTLCPQSRDQGEK---HH t38 DMLN--MPLTQNAMCVESEMVRKSQPD-GQDKRGYTWVAKE-DDLCPQSGDQGEK---YA t33 DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKE-DTLCPQSGDQGEK---YA t100 DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKE-DTLCPQSEDQGEK---YA t34 DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKE-DTLCPQSGDQGEK---YA t84 DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKE-DTLCPQSGDQGEK---YA t92 DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKE-DTLCPQSGDQGEK---YA t3 DMLI--MPLVQNAMCVKSEMVRKCQPD-GPDKRGYTWMAKD-DTLCPVSAGQGEK---YA t22 DMLI--MPLVQNAMCVNPEMVRKCQPD-GQDKRGYTWMAKN-DTLCPVSAGQGEC---YA t64 DMLK--MPLVQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQSANQGEK---YA t18 DMLK--MPLVQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQSANQGEK---YA t68 DMLK--MPLRQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQSANQGEK---YA t28 DMLN--MPLVQNAMCVSSEMVRKCQPD-GQNKRGCTWMAKN-DTLCPQSGNQGEK---FA t82 DMLN--MPLVQNAMCVSSEMVRKCQPD-GQNKRGYTWMAKH-DTLCPQSGNQGEK---YA t41 DMLN--MPLVQNAMCVSSEMVRKCQPD-GQNKRGYTWMAKH-DTLCPQSGNQGEK---YA t71 DMLN--MPLHQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQSPNQGEK---YA t94 DMLN--MPLHQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLYPQSANQGEK---YA t99 EMLN--MPLVQNAMCVPSEMVRKCQPD-GQDKRGYTWMAKD-DPLCPQSANVGEK---YA t40 EMLN--MPQVQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQSANVGEK---YA t90 EMLN--MPLVQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQSANQGEK---YA t4 EMLN--MPLIQNAMCVQSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQLHNQAEK---YA t36 NGQA--MNIVPYWVCVASGVVRKTHKD-SVDKRGQTWTAKS-DFLCPLAVDQGEP---GA t87 NGQA--MNIVPYWVCVGSGVVRKTHKD-SVDKRGQTWTAKS-DFLCPLAMDQGEI---GA t89 NGQA--MNIVPYWVCVASGVVRKMHKD-SVEKRGQTWEAKS-DFLCPLAVDQGEP---AA t39 NGQA--MNIVPYWVCVASGVVRKMHKD-SVEKRGQTWEAKS-DFLCPLAVDQGEP---AA t8 NGQA--MNIVPYWVCVASGVVRKMHKD-SVEKRGQTWEAKS-DFLCPLAVDQGEP---AA t6 NGQA--MNIVPYWVCVASGVVRKMHKD-SVEKRGQTWEAKS-DFLCPLAVDQGEP---AA t58 NGQA--MNIVPYWVCVVSGVVRKTHKD-SVEKRGQTWTAKS-DFLCPIAVNQGEP---GA t54 NGQA--MNIVPYWVCVVSGVVRKTHKD-SVEKRGQTWTAKS-DFLCPIAVNQGEP---GA t65 DPQM--MNIVPYWVCVCSGVVKKAEPD-SVDKRGKTWVAKS-DFLCPAAVNEGEH---GA t29 DPQM--MNIVPYWVCVCSGVVKKAEPD-SVDKRGKTWVAKS-DFLCPPAVNEGEH---GA t98 DPQM--MKIVPYWVCVCSGVVKKAEPD-SVDKRGKTWVAKS-DFLCPPAVNEGEH---GA t83 DPQM--MNIVPYWVCVCSGVVKKAEPD-SVDKRGKTWVAKS-DFLCPPAVNEGEH---GA t95 DPQM--MNIVPYWVCVCSGVVKKAEPD-SVDKRGKTWVAKS-DFLCPPAVNEGEH---GA t20 DPQC--MNIVPYWVCVCSGVVKKAEPD-SVDKRGKTWVAKS-DFLCPPAVNEGEH---GA t69 VPQM--MNIVPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKS-DFLCPPAMNEGEH---GA t2 VPQM--MNIVPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKS-DFLCPPAMNEGEH---GA t10 VPQM--MNIVPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKS-DFLCPPAMNEGEH---GA t31 TPQM--MNIAPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKA-DFLCPPAVSEGEH---GA t15 TPQM--MNIAPYWVCVCSGVVKKTQPD-SVDKRGKTWVAKS-DFLCPPAVSEGEH---GA t63 TPQK--MNIAPYWVCVCSGVVKKAQLD-SVEKRGKTWVAKS-DFLCPPAVSEGEH---GP t50 TPQK--MNIAPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKS-DFLCPPAVSEGEH---GP t25 TPQK--MNIAPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKS-DFLCPPAVSEGEH---GP t51 TPQM--MNIAPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKS-DFLCPPAISEGEH---GA t9 DNQA--MVILPYWVCVCSGVVRKEHPDTSINKRGRTWLAKV-DFLCPPAFNQGEH---RA t47 DNQA--MNILPYWVCVCSGVVRKEHPDTSINKRGRTWLAKM-DFLCPPAFNQGEH---RA t60 DNQA--MNILPYVVCVCSGVVRKDHPDTSIHKRGRTWLAKV-DFLCPPAFNQGEH---RA t30 DNQA--MNILPYVVCVCSGVVRKEHPDTSISKRGRTWLAKI-DFLCPPAFNQGEH---RA t70 DNQA--MNILPYWVCVCSGVVRKEHPDTSINKRGRTWLAKI-DFLFPPAFNQGEH---HA t91 DNQA--MNILPYWVCVCSGVVRKEHPDTSINKRGRTWLAKI-DFLFPPAFNQGEH---HA t49 DNQA--MNILPYWVCVCSGVVRKEHPDTSINKRGRTWLAKI-DFLFPPAFNQGEH---HA t52 DNQA--MTILPHWVCVVSGVVRKEHPDTSINKRGSTWLAKV-DFLCPPAFNQGEH---HA t43 DQQA--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKN-DFLCPPAFNQGEH---ER t1 DQQE--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKN-DFLCPPAFNQGEH---EA t86 DQQA--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKN-DFLCPPAFNQGEH---GA t72 DQQA--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKN-DFLCPPAFKQGEH---EA t96 DQQA--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKN-DFLCPPAFNQGEH---EA t44 DQQA--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKN-DFLCPPAFEQGEH---EA t45 DQEA--MDILPYWVCVMSGVVRKEQPD-SVNKRGRTWLAKK-DFLVPPAFNQGEH---EA t74 DQEA--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKK-DFLVPPAFNQGEH---EA t26 DQQP--MNIIPYWVCVTSGVVHKEQPD-SVNKRGRTWTAKN-DFLCPDAFNQGEH---GA t61 DQQP--MNIIPYWVCVTSGVVRKEQPD-SVNKRGHTWTAKN-DFLCPPAYNQGEH---GA t97 DQQP--MNIIPYWVCVTSGVVRKEQPD-SVNKRGRTWSAKN-DFLCPPAFNQGEH---GA t17 NQQS--MNIIPYWVCVHSGVVQKEQPD-SVHKRGRTWTAKN-DFLCPPAFDQGEH---GA t11 NQQS--MNIIPYWVCVHSGVVQKEQPD-SVHKRGRTWTAKN-DFLCPPAFDQGEH---GA t23 NGQS--MHIGPYWVCVSSEVVKKSQPD-SVDKRGRTWVAKN-EFLCPPDHMQGEHSIEGA t76 ALDLYTHKPVPDAQCFVSRVARNIPEH-SPCK---------------------------- t53 ALDLYTHRPVPDAQCFVSRVARNIPEH-SPCK---------------------------- t16 ALDLYTHKPVPDAQCFVSRVARNIPEH-SPCK---------------------------- t42 ALELYTHKPVPDPQCFVSIVVRNIPEH-SPCK---------------------------- t24 ALELYTHKPVPDPQCFVSIVVRNIPEH-SPCK---------------------------- t73 EPGGVTQ-----------------RCIENIAKLLYIKDV-LCNEVLNGLQWQLCWSV-GD t66 EPGGVTQ-----------------RCIENIAKLLYIKDV-LCNEVLNGLQWQLCWSV-GD t32 EPGGVTQ-----------------RCIENIAKLLYIKDV-LCNDVLNLLVWQLCWSV-GD t75 EPGGVRR-----------------RIVENIVKLLYIKDV-LCNQVLNLLQWQLCWSV-GD t79 EPEGADQ-----------------RTVHDMVRLLYSKDV-DCNMVLSLLVWQLC--H-GD t27 EPEGADQ-----------------RTVHDMVRLLYSKDV-DCNIVLSLLVWQLC--H-GD t93 EPEGADQ-----------------RTVHDMVRLLYSKDV-DCNIVLSLLVWQLC--H-GD t59 EPEGADQ-----------------RTVHDMVRLLYSKDV-DCNIDLSLLVWQLC--H-GD t7 EPEGADQ-----------------RTVHDMVRLLYSKDV-DCNIVLSLLVWQLC--H-GD t78 EPEGADQ-----------------RTVHDMVRLLYSKDV-DCNIVLSLLVWQLC--H-GD t12 EPEGPNN-----------------RVVHNIVKLLYTKDV-DCNTVLSLLIWQLC--H-GD t81 EPEGAQY-----------------RVVQNIVKLLYTKDV-DCNVVLSLLVWQLC--H-GD t21 EPEGAQY-----------------RCVQNIVKLLYTKDV-DCNVVLSLLVWQLC--H-GD t80 EPDAAQQ-----------------RIVQNIVKLLYTKDV-SCNVVLSMLIWQIC--H-GD t14 EPDAAQQ-----------------RIVQNIVKLLYTKDV-SCNVVLSMLIWQIC--H-GD t85 EPDGAQQ-----------------RVVQNIVKLLYTKDV-SCNKVLSMLIWQIC--H-GD t62 EPEGAQH-----------------RMVQNIVKLLYTKDV-SCNVVLSMLIWQIC--H-GD t19 EPEGAKQ-----------------RMVQNIVKLLYTKDV-SCNVVLSMLIWQIC--H-GD t77 EPEGAQQ-----------------RIVQNIVKLLYTKDI-SCNVVLSMLIWQIC--H-GD t88 EPTQWMG-----------------TVTVNKIKLLYCKDC-SCNEVMKILSWWLCNSV-GD t37 EPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAWWLCDSV-GD t35 EPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAWWLCDSV-GD t48 EPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAWWLCDSV-GD t55 EPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAWWLCDSV-GD t46 EPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAWWLCDSV-GD t67 EPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAWWLCDSV-GD t57 EPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAWWLCDSV-GD t56 EPTQWED-----------------ICTANVIKLLYCKDC-SCNEVLRVLSWWLCDSV-GD t13 EPHHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVWQLCESV-DD t5 EPHHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVWQLCESV-DD t38 EPQHIRN-----------------QFSENVIKLLYCKDC-SCNKVLKILVWQLCESV-DD t33 EPQHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVWQLCESV-DD t100 EPQHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVWQLCESV-DD t34 EPQHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVWQLCESV-DD t84 EPQHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVWQLCESV-DD t92 EPQHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVWQLCESV-DD t3 EPHQIKN-----------------KFSVNVIKLLYCKDC-SCNKVLKILVWQLCESV-DD t22 EPHQTCN-----------------KFRVNVIKLLYCKDC-SCNKVLKILKWQLCESV-DD t64 EPHVIGD-----------------KVSVNVIKLLYCKDC-SCNQVLKILVWQLCESV-DD t18 EPHVIGD-----------------KVSVNVIKLLYCKDC-SCNQVLKILIWQLCESV-DD t68 EPVVIGD-----------------KVSVNVIKLLYCKDC-SCNQVLKILVWQLCESV-DD t28 EPHQIRN-----------------KVSVNVIKLLYCKDC-SCNKMLKILVWQLCESV-DD t82 EPHQIRN-----------------KVSVNVIKLLYCKDC-SCNKVLKILVWQLCESV-DD t41 EPHQIRN-----------------KVSVNVIKLLYCKDC-SCNKVLKILVWQLCESV-DD t71 EPHQIRN-----------------KISVNVIKLLYCKDC-SCNQVLKILVWQLCESV-DD t94 EPHQIRN-----------------KISVNVIKLLYCKDC-SCNKVLKILVWQLCESV-DD t99 EPHQIRN-----------------KVSVNVIKLLYCKDC-SCNKVLKILVWQLCDSVQDD t40 EPHQIRN-----------------KVSTNVIKLLYCKDC-SCNKVLKILVWQLCDSV-DD t90 EPHQIRN-----------------KVSTNVIKLLYCKDC-SCNKVLKILVWQLCDSV-DD t4 EPHQIRN-----------------KVSVNVIKLLYCKDC-SCNKVLKILVWQLCESV-DD t36 EQKPAVGEENPICKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKWQLCDSH-GD t87 EQKPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKWQLCDSH-GD t89 EQRPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKWQLCDSH-GD t39 EQRPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKWQLCDSH-GD t8 EQKPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKWQLCDSH-GD t6 EQKPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKWQLCDSH-GD t58 EQKPAVGA-NPIVKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKWQLCDSH-GD t54 EQKPAVGA-DPIVKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKWQLCDSH-GD t65 EQRP-FGTQNPVCKPWRFVIMNTQTFAKNIIRLLYIKDI-SCNQVLQLLDWQLCDSH-GD t29 EQRPVFGTHNPVCKPWRFVIMHTQTFSKNIIRLLYIKDI-SCNQVLQLLDWQLCDSH-GD t98 EQRPVFGTQNPVCKPWRFVIMNTQTFSKNIIRLLYIKDI-SCNQVLQLLDWQLCDSH-GD t83 EQRPVFGTQNPVCRPWRFVIMNMQTFSKNIIRLLYIKDI-SCNQVLQLLDWQLCDSH-GD t95 EQRPVFGGQNPACKPWHFVIMNRQTFAKNMIRLLYIKDI-SCNEVLQLLDWQLCDSH-GD t20 EQRPVFGGQNPACQPWHFVIMNTQTFAKNMIRLLYIKDI-SCNQVLQLLDWQLCDSH-GD t69 EQRPVFGTQNPVCKPWRFVIMNTETFAKNIIRLLYVKDV-SCNQVLQLLDWQLCDSH-GD t2 EQRPVFGTQNPVCKPWRFVIMNVETFAKNIIRLLYVKDV-SCNQVLQLLDWQLCDSH-GD t10 EQRPVFGTQNPVCKPWRFVIMNVETFAKNIIRLLYVKDV-SCNQVLQLLDWQLCDSH-GD t31 EQRPVFGMQNPACKPWRFVIMNTQTFAKNIIRLLYIKDV-SCNAVLQLLDWQLCYSH-GD t15 EQRPVFGMRNPICKPWRFVIMNTQTFAKNIIRLLYVKDV-SCNAVLQLLDWQLCYSH-GD t63 EQRPVFGVQNPVCKPWRFVIMNTQTFAANIIRLLYVKDV-SCNAVLQLLDWQLCYSH-ED t50 EQRPVFGVQNPVCKPWRFVIMNTQT-AKNIIRLLYVKDV-SCNAVLQLLDWQLCYSH-GD t25 EQRPVFGMQNPVCKPWRFVIMNTQTFAKNIIRLLYVKDV-SCNAVLQLLDWQLCYSH-GD t51 EQRPVFGMQNPVCRPWRFVIMNTQTFAKNVIRLLYVKDV-SCNKVLQLLDWQLCYSH-GD t9 EQKPASKFEHPVCGPWEFVIVNYQTSAKDIILLLYIKDV-GCNCVLELLNWQLCTSH-GD t47 EQKPALKFKHPVCGPWEFVIVNYQTSAKDIILLLYIKDV-GCNCVLELLKWQLCTSH-GD t60 EQKPAFKFKHPVCGPWEFVIVNYQTEAKDIILLLYIKDV-GCNCVLELLEWQLCTSH-GD t30 EQKPAFKFKHPCCGPWEFVIVNYQTSAKDIILLLYIKDV-GCNCVLELLKWQLCTSH-GD t70 EQKPVFGFKHPHCGPWEFVICNYQTVAKDIILLLYIKDV-GCNCVLELLAWQLCTSH-GD t91 EQKPVFGFKHPHCGPWEFVICNYQTVAKDIILLLYIKDV-GCNCVLELLAWQLCTSH-GD t49 EQKPVFGFKHPHCGPWEFVICNYQTMAKDIILLLYIKDV-GCNCVLELLAWQLCTSH-GD t52 EQKPVFGFKHPHVGPWEFVICNYQTSAHDIILLLYIKDV-GCNCVLELLVWQLCTSH-GD t43 EQRPVYGFKHPGCRPWQFVIANYQTSAKNIIMLLYVKDV-SCNGVLQLLNWHLCDSH-GD t1 EQRPVYGFVHPRCRPWQFVIANYQTSAKNIIMLLYVKDV-SCNGVLQLLNWQLCDSH-GD t86 EQRPVYGFQHPRCRPWQFVIANYQTFAKNIIMLLYVKDV-SCNRVLQLLNWQLCDSH-GD t72 EQRPVYGFQHPRCRPWQFVIANYQSSAKNIIMLLYVKDV-SCNGVLQLLNWQLCDSH-GD t96 EQRPVYGFQHPRCRPWQFVIGNYQTSAKNIIMLLYVKDV-SCNGVLQLLNWQLCDSH-GD t44 EQCPVHGFKHPRCRPWQFVIADYQTSAKNIIMLLYVKDV-SCNGVLQLLNWQLCDSH-GD t45 EQKPVYGFDHPRCRPWQFVIANYQTSAKNIIMLLYVKDV-SCNGVLQLLNWQLCDSH-GD t74 EQKPVYGFDHPRCRPWQFVIANYQTSAKNIIMLLYVKDV-SCNGVLQLLNWQLCDSH-GD t26 EQKPTHGFEHPRCRPWQFVIPVYQTGAKNIILLLYIKDV-SVNGVLQLLIWQLCDSH-GD t61 EQKPVHGFEHPRCRPWQFVISVYRTGAKNINLLLYIKDV-SCNGVLQLLNWQLCDSH-GD t97 EQKPVHEFQHPRCRPWQFVISVYRTGAKNINLLLYIKDV-SCNGVLQLLNWQLCDSH-GD t17 EQKPVHGFEHPRCRPWQFVIANYQTGAKNIILLLYVKDV-SCNGVLQLLNWQLCDSH-GD t11 EQKPVHGFEHPRCRPWQFVIANYQTGAKNIILLLYVKDV-SCNGVLQLLNWQLCDSH-GD t23 EQKPFFGFAGPFPQPWQFVIVNPQTWAHNIIRLLYCKDV-SCNCVLTCLNWVLCDSH-GD t76 -P---CS-----------------VEEVCYCKVCDSKDVRAVKNAYQDLQVPLSKLK-AP t53 -P---CS-----------------VEEVCYCKVCDSKDVRAVKNAYQDLQVPLSKLK-AP t16 -P---CS-----------------VEEVCYCKVCDSKDVRAVKNAYQDLQVPLSKLK-AP t42 -P---CS-----------------VEEVAYCKNCDSKDVRAVQNAYQDLAVPLSKLK-AP t24 -P---CS-----------------VEEVCYCKNCDSKDVRATQNAYQELAIPLSKLK-AP t73 WESLIPQACWDAKKDLAVCAWKMELVPGL---NRNNENLAKVIYFGPDGH-------DEG t66 WESLIPQACWDAKKDLAVCAWKMELVPGL---NRNNENLAKVIYFGPDGH-------DEG t32 WESLIPQACWDAKKDLAVCAWKMELVPGL---NRNNENLAKVIYFGPDGH-------DEG t75 WESLVPQACWGAKKDLAVCAWKMELVPGL---NRNNENLAKIIYFGPDGH-------DEG t79 WEPLLPQACESARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEG t27 WEPLLPQACEGARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEG t93 WEPLLPQACEGARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEG t59 WEPLLPQACESARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEG t7 WEPLLPQACESARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEG t78 WEPLLPQACESARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEG t12 WEQLVPQACAGARSDLAVCAWKRELVPGL---NHNNENLAKVIYFGPDGH-------DEG t81 WEPLIPQACQTAKKDLAVCAWKRELVPGL---NCNNEYLAKIIYFGPDGH-------DEG t21 WEPLVPQACQTAKKDLAVCAWKRELVPGL---NCNNEYLAKIIYFGPDGH-------DEG t80 WEPQIPKACSNARKDLAVCAWKRELVPGL---NHNNENLAKIMYFGHDGH-------DEG t14 WEPQIPKACSNARKDLAVCAWKRELVPGL---NHNNENLAKIMYFGHDGH-------DEG t85 WEPQIPKACNAARKDLAVCAWKRELVPGL---NHNNEDLAKIMYFGPDGH-------DEG t62 WEPQIPKACDAARKDLAVCAWKRELVPGL---NHNDENLAKIMYFGPDGH-------DEG t19 WEPQIPKACDAARKDLAVCAWKRELVPGL---NHNDENLAKIMYFGPDGH-------DEG t77 WEPQLPKACDAARKDLAVCAWKRELVPGL---NHNDENLAKIMYFGPDGH-------DEG t88 WQTLMSQACITADPNPPVCVWKRELVPGL---NRSVENLAKIIYFCPDEH-------DER t37 WQTLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DER t35 WQTLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DER t48 WQTLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DER t55 WQTLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DER t46 WQSLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DER t67 WQTLMNDACSSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DER t57 WQTLMNDACSSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DER t56 WRTLMSDACALANPNPPVCVWKRELVPGL---NRNVENLAKIIYFCPDEH-------DER t13 WQTLSSDACMHAEQNPPVCVWQRELVPQL---NRNIEDLARIIYFGPDEH-------DEG t5 WQTLSSDACMHAEQNPPVCVWQRELVPQL---NRNIEDLARIIYFGPDEH-------DEG t38 WQTLASDACVSAEPNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEG t33 WQTLASDACVSAEPNTPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEG t100 WQTLASDACVSAEPNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEG t34 WQTLASDACVSAEPNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEG t84 WQTLASDACVSAEHNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEG t92 WQTLASDACVSAEPNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEG t3 WVTLASDACLSAEPQPPVCVWQRELVPQL---NRNVEDLARVIYFGPDEH-------DEG t22 WQTLASDACILAKPQPPVCVWQRELVPQL---NRNVEDLARVIYFGPDEH-------DEG t64 WQTLTSDPCMSAQVNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEG t18 WQTLTSDPCMSAQVNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEG t68 WQTLTSDPCMSAQVNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEG t28 WQTLASDPCVSAEPNSPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEG t82 WQTLASDPCMSAEPNPPVCVWQRELVPQL---NRNVEDLARTIYFGPDEH-------DEG t41 WQTLASDPCMSAEPNPPVCVWQRELVPQL---NRNVEDLARTIYFGPDEH-------DEG t71 WQTLTSDPCVPAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEG t94 WQTLTSDPCVPAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEG t99 WQPLTSDPCVNAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEG t40 WQTLTSDPCVSAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEG t90 WQTLTSDPCVSAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEG t4 WQTLTSDPCISAEPNPPVCVWQRELVPQL---NRNIEDLARIIYFGPDEH-------DEG t36 WQSLYADSCPIA-VNAAVCGWKRELVPGL---NHSCEHLAKSVYFEPDGE-------GEG t87 WQSLYADSCPIA-INAAVCGWKRELVPGL---NHSCEHLAKSVYFEPDGE-------GEG t89 WQSLFPDSCPFA-LNAAVCGWKRELVPGL---NHSCEHLAKGVYFEPDGE-------DEG t39 WQSLFPDSCPFA-LNAAVCGWKRELVPGL---NHSCEHLAKGVYFEPDGE-------DEG t8 WQSLFPDSCPFA-LNAAVCGWKRELVPGL---NHSCEHLAKGVYFEPDGE-------DEG t6 WQSLFPDSCPFA-LNAAVCGWKRELVPGL---NHSCEHLAKGVYFEPDGE-------DEG t58 WQSLFADSCPTA-VNAAVCAWKRELVPGL---NHSCEHLAKSVYFEPDGE-------DEG t54 WQSLFADSCPIA-VNAAVCAWKRELVPGL---NHSCEHLAKSVYFEPDGE-------DEG t65 WQSLMADSCPNA-VNIAVCCWKRELMPGL---NHSCEHLAKSVYFKPDGE-------DEG t29 WQSLMADSCPNA-VNIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEG t98 WQSLMADSCPPA-VNIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEG t83 WQSLMADSCPPA-VNIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEG t95 WQSLIADSCPNA-SNIAVCCWKRELMPGL---NFSCEHLAKTVYFKPDGE-------DEG t20 WQSLIADSCPNA-VNIAVCCWKRELMPGL---NFSCEHLAKTVYFKPDGE-------DEG t69 WQSLVADSCPNA-VNIAVCCWKRELMPGL---NHSCQHLAKTVYFKPDGE-------DEG t2 WQSMVADSCHNA-VNIAVCCWKRELMPGL---NHSCQHLAKTVYFKPDGE-------DEG t10 WQSMVADSCHNA-VNIAVCCWKRELMPGL---NHSCQHLAKTVYFKPDGE-------DEG t31 WQSLIADSCPTA-ANIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEG t15 WQSLIADSCSTA-ANIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEG t63 WQSLIADACPTA-VNIAVCCWKRELMPGL---NHSCEHLAKSVYFKPDGE-------DEG t50 WQSLIADACSTA-VNIAVCCWKRELMPGL---NHSVEHLAKSVYFKPDGE-------DEG t25 WQSLIADACPTA-VNIAVCCWKRELMPGL---NHSCEHLAKSVYFKPDGE-------DEG t51 WQSLIADACPAA-VNIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEG t9 WQSLVADSCVWA-HNVAVCAWKRELVPGL---NHSCERLAKHIYFQPDGE-------DEG t47 WQSLVADSCVWA-HNVAVCAWKRELVPGL---NHSCDRLAKHIYFQPDGE-------DEG t60 WQSLVADSCVWA-HNVAVCAWKRELVPGL---NHSCERLAKHIYFQPDGE-------DEG t30 WQSLVADSCVWA-HNVAVCEWKRELVPGL---NHSCERLAKHIYFQPDGE-------DEG t70 WQSLVADSCIWA-HNVAVCAWKRELVPGL---NHSCEHLAKHIYFQPDGE-------DEG t91 WQSLVADSCIWA-HNVAVCAWKRELVPGL---NHSCEHLAKHIYFQPDGE-------DEG t49 WQSLVADSCIWA-HNVAVCAWKRELVPGL---NHSCEHLAKHIYFQPDGE-------DEG t52 WQSLVADSCVWA-HNVAVCAWKRELVPGL---NHSCERLAKHIYFQPDGE-------DEG t43 WQSLVADSCCWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEG t1 WQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEG t86 WQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEG t72 WQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEG t96 WQSLVADSCMWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEG t44 WQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEG t45 WQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEG t74 WQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEG t26 WQSLVADSCAWA-VNVAVCGWKRELVPGL---NHACEMLAKTVYFEPDGE-------DEG t61 WQSLVADSCAWA-VNVAVCGWKRELVPGL---NHACEMLAKSVYFEPDGE-------DEG t97 WQSLVADSCAWA-VNLAVCGWKRELVPGL---NHACEMLAKSVYFEPDGE-------DEG t17 WQSLSADSCAWA-HNVAVCGWKRELVPGL---NHACEMLAKTVYFEPDGE-------DEG t11 WQSLSADSCAWA-HNVAVCGWKRELVPGL---NHACEMLAKTVYFEPDGE-------DEG t23 WHSLIADACPCA-HNVAVCGWKRELVPGL---NHSNEHMAKTIYFEPDGH-------DEG t76 WLSMGHCECWEEDINNILSVVKHELVDDVDRMNRLTEVAAKMAYFGPDGFHWDVELWEEN t53 WLSMGHCECWEEDINNILSVVKHELVDDVDRMNRLTEVAAKMAYFGPDGFHWDVELWEEN t16 WLSMGHCECWEEDINNILSVVKHELVDDVDRMNRLTEVAAKMAYFGPDGFHWDVELWEEN t42 WLSMGHCECWEEDINNILSMVKHELVVDQDMVNRSPEVAAKMAYFGPDGFHWDVELCEES t24 WLSMGHCECWEEGINNILSMVKHELVVDQDMVNRSPEVAAKMAYFGPDGFHWDIELCEEN t73 PMQAKIVTLHW-EMDVSHRG-SSVDDNVV-VVMLAFAVSFCHPWGHYIQGLGD-QHKLAR t66 PMQAKIVTLHW-EMDVSHRG-SSVDDNVV-VVMLAFAVSFCHPWGHYIQGLGD-QHKLAR t32 PMQAKIVTLHW-EMDVSHRG-SSVDDNVV-VVMLAFAVSFCHPWGHYIQGLGD-QHKLAR t75 NMQAKIVTLHW-EMDVSHRG-SSVDDNVV-VVMLAFQASFCDPWGHYIQGLGD-QYKLAR t79 PMGQKIVDLHW-EMEASHRE-SSVRDNMI-VIMLPFNINSIDPWRHYILGLND-QIQLTR t27 PMGQKIVDLHW-EMEASHRE-SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTR t93 PMGQKIVDLHW-EMEASHRE-SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTR t59 PMGQKIVDLHW-EMEASHRE-SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTR t7 PMGQKIVDLHW-EMEASHRE-SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTR t78 PMGQKIVDLHW-EMEASHRE-SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTR t12 PMGRKIVDLHW-EMEASHRD-SSVRDNML-VIMLPFLVNNLDPWRHYILGLND-QIQLTR t81 PMGRKIVDLHW-EMEASHRD-SSVRDNYI-VIMLPFKINSRDPWRHYILGIND-QVCLNR t21 PMGRKIIQLHW-EMEASHRD-SSVRDNII-VIMLPFKINSRDPWRHYILGLND-QVCLTR t80 PMGRKIVTLHV-EMEVSHRE-SSVKDNMIRVIMLPFSNSSVDPWRHVILGLND-QIKLTR t14 PMGRKIVTLHV-EMEVSHRE-SSVKDNMIRVIMLPFPSSSVDPWRHVILGLND-QIKLTR t85 PMGRKIVNLHV-EMEVSHRE-SSVKDNMIRVVMLPFDNNSWDPWRHVILGLND-QIKLTR t62 PMGEKIVNLHV-EMEASHRE-SSVKDNMIRVIMLPFSNNSWDPWRHVILGLND-KIKLTR t19 PMGEKIVNLHV-EMEASHRE-SSVKDNMIRVIMLPFSNNSWDPWRHVILGLND-QIKLTR t77 PMGEKIVNLHV-EMEASHRE-SSVKDNMIRVIMLPFGENSWDPWRHVILGLND-QIKLTR t88 KMWGKIFALEW-EMDISHRH-SSVDDNHC-VEMLPFMCQRVDPWGHYVQILAD-RQDLAR t37 RMWGKIFALEW-EMDISHRH-SSVDDNQC-VELLPYVCQRMDPWGHYVQILAD-RLDLTR t35 RMWGKIFALEW-EMDISHRH-SSVDDNQC-VELLPYVCQRMDPWGHYVQILAD-RLDLTR t48 RMWGKIFALEW-EMDISHRH-SSVDDNQC-VELLPYVCQRMDPWGHYVQILAD-RLDLTR t55 RMWGKIFALEW-EMDISHRH-SSVDDNQC-VELLPYNCQRMDPWGHYVQILAD-RLDLTR t46 RMWGKIFALEW-EMDISHRH-SSVDDNQC-VELLPYVCQRMDPWGHYVQVLAD-RLDLTR t67 RMWGKIFALEW-EMDISHRM-SSVDDNQC-VELLPYVCQRMDPWGHYVQILAD-RLVLTR t57 RMWGKIFALEW-EMDISHRV-SSVDDNQC-VELLPYLCQRMDPWGHYVQILAD-RLNLTR t56 KMWGKIFCLEW-EMDISHRH-SSVDDNVC-VEMLPFVCQKMDPWGHYVQILAD-RLDLTR t13 KMMNKIFALEW-EMDMSHRV-SSVDDNYI-IEMLPFVCDRVGPWGHYAQVLAD-QLHLTR t5 KMMNKIFALEW-EMDMSHRV-SSVDDNYI-IEMLPFVCDRVGPWGHYAQVLAD-QLHLTR t38 KMMCKIFALEW-EMDMSHRM-SSVDDNYI-IEMLPFVCDRVGPWGHYAQILAD-QLNLTR t33 KMMCKIFALEW-EMDMSHRM-SSVDDNYI-IEMLPFVCDRVGPWGHYAQILAD-QLNLTR t100 KMMCKIFALEW-EMDMSHRM-SSVDDNYI-IEMLPFVCDRVGPWGHYAQILAD-QLNLTR t34 KMMCKIFALEW-EMDMSHRM-SSVDDNYI-IEMLPFVCDRIGPWGHYAQILAD-QLNLTR t84 KMMCKIFALEW-EMDMSHRM-SSVDDNYI-IEMLPFVCDRIGPWGHYAQILAD-QLNLTR t92 KMMCKIFALEW-EMDMSHRM-SSVDDNYI-IEMLPFVCDRIGPWGHYAQILAD-QLNLTR t3 KMMPKIFKLAW-EMDMSHRA-SSVDDNYV-IEMLPFVCHRVGPWGHYAQVQAD-QQNLTR t22 KMMSKIFNLEW-EMDMSHRT-SSVDDNYV-IDMLPFICHRVGPWGHYAQVLAD-QINLTR t64 KMMSKIFALEW-EMDMSHRV-SSVDDNFV-IEMLPFVCHRIGPWGHYAQMLAD-QIVLTR t18 KMMFKIFALEW-EMDMSHRV-SSVDDNFV-IEMLPFVCHRIGPWGHYAQMLAD-QIVLTR t68 KMMSKIFALEW-EMDMSHRV-SSVDDNFV-IEMLPFVCHRIGPWGHYAQMLAD-QIVLTR t28 KMMSKIFALEW-EMDMSHRI-SSVDYNYV-IEMLPFVYHRVGPWGHYAQMLSD-QIVLTR t82 KMMSKIYALEW-EMDMSHRV-SSVDYNYV-IEMLPFVYHRVGPWGHYAQMLAD-QIVLTR t41 KMMSKIYALEW-EMDMSHRV-SSVDYNYV-IEMLPFVYHRVGPWGHYAQMLAD-QIVLTR t71 KMMRKIFALEW-EMDMSHRD-SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-RLVLTR t94 KMMRKIFALEW-EMDMSHRD-SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-KLVLTR t99 KMMNKIFALEW-EMDMSHRDSSSVDDNYI-IEMLPFVCHRVGPWGHYAQQLAD-RLVLTR t40 KMMRKIFALEW-EMDMSHRN-SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-RLVLTR t90 KMMRKIFALEW-EMDMSHRN-SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-RLVLTR t4 KMMGKIFALEW-EHDMSHRD-SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-RLVLTR t36 KVMLKIFGLDWCEVERSHEH-SSVDDNYN-VNMLPFQNSRKDPVGHYVQDLED-ARRLIR t87 KVMLKIFGLDWCEVERSHEH-SSVDDNYN-VNMLPFQNSRKDPVGHYVQDLED-ARRLIR t89 KMMLKIFGLEWCEVERSHEH-SSIDDNYT-VNMLEFPNSRKDPVGHYVQNLED-AIRLIR t39 KMMLKIFGLEWCEVERSHEH-SSIDDNYT-VNMLEFPNSRKDPVGHYVQNLED-AIRLIR t8 KMMLKIFGLEWCEVERSHEH-SSIDDNYT-VNMLPFPNSRKDPVGHYVQDLED-AIRLIR t6 KMMLKIFGLEWCEVERSHER-SSIDDNYT-VNMLPFPNSRKDPVGHYVQDLED-AIRLIR t58 KMMLKIFGLDWCEVERSHEH-SSVDDNYN-VNMLPFPNSRQDPVGHYVQDLED-LLRLIR t54 KMMLKIFGLDWCEVERSHEH-SSVDDNYN-VNMLPFPNSRKDPVGHYVQDLED-LLRLIR t65 QMTMKIFNLDWCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQELED-ALRLIR t29 QMSQKIFNLDWCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQELEDSALRLIR t98 QMTMKIFNLDWCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQELED-ALRLIR t83 QMTMKIFNLDWCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQELED-ALRLIR t95 QMIMKIFNLDWCEVEKSHEK-SSVDDNYD-VNMLPFMQSHGDPVGHYVQGLED-ALRLIR t20 QMIMKIFNLDWCEVEKSHEK-SSVDDNYD-VNMLPFMQSHGDPVGHYVQGLED-ALRLIR t69 QMIVKIFNLDWCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQGLED-ALRLIR t2 QMMVKIFNLDWCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQGLED-ALRLIR t10 QMMVKIFNLDWCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQGLED-ALRLIR t31 QMIMKIFNLDWCEVEKSHEK-SSVDDNYH-VNMLPFAQSNRDPVGHYVQGLED-ALRLIR t15 QMIVKIFHLDWCEVEKSHEK-SSVDDNYH-VNMLPFAQSNGDPVGHYVQGLED-ALRLIR t63 QMFMKIFNLDWCEVEKSHEK-SSVDDNYN-VNMLPFVQSNGDPVGHYVQGLED-ALLLIR t50 QMIMKIFNLDWCEVEKSHEK-SSVDDNYN-VNMLPFVQSNGDPVGHYVQGLED-ALLLIR t25 QMIMKIFNLDWCEVEKSHEK-SSVDDNYD-VNMLPFVQSNGDPVGHYVQGLED-ALLLIR t51 QMIMKIFGLDWCEVEKSHEE-SSVDDNYN-VNMLPFVSSNGDPVGHYVQGLED-ALRLIR t9 KMVLKIFGLDWCEMEKSHQR-SGVDDNYQ-VNMLPFNHSKNNPVGHYVQGLED-ALNLIR t47 KMILKIFGLDWCEMEKSHQR-SGVDDNVQ-VNMLPFNHSKHNPVGHYVQGLED-ELNLIR t60 KMILKIFGLDWCEMEKSHQR-SGVDDNYQ-VNMLPFNHSKHNPVGHYVQGLED-ALNLIR t30 KMILKIFGLDWCEMEKSHQR-SGVDDNYQ-VNMLPFKHSKHNPVGHYVQGLED-ALNLIR t70 KMILKIFGLDWCEVERSHQR-SGVDDNYK-VNMLPFSHSKHNPVGHYVQGLGD-ALRLIR t91 KMILKIFGLDWCEVERSHQR-SGVDDNYK-VNMLPFSHSKHNPVGHYVQGLGD-ALRLIR t49 KMIIKIFGLDWCEVEKSHQR-SGVDDNYK-VNMLPFSHSNHNPVGHYVQGLGD-ALRLIR t52 KMILKIFELNWCEVEKSHQH-SGVDDNYQ-VNMLPFDHCKHNPVGHYVQGLED-ALGLIR t43 KMVLKIFELDWVEMEKSHQQ-SSVDDNYL-VNMLPFLHSRQNPVGHYVQGLED-PMHLIR t1 KMVLKIFELDWVEMEKSHQQ-SSVDDNYL-VNMLPFMQSRENPVGHYVQGLED-PMHLIR t86 KMVLKIFELDWVEMEKSHQQ-SSVDDNYL-VNMLPFIHSRDNPVGHYVQGLED-PMHLIR t72 QMVLKIFELDWVEMEKSHQQ-SSVDDNYL-VNMLPFIHSRENPVGHYVQGLED-PMHLIR t96 KMVLKIFELDWVEMEKSHQQ-SSVDDNYL-VNMLPFIRSRENPVGHYVQGLED-PMHLIR t44 KMVLKIFELDWVEMEKSHQQ-SSVDDNYL-VNMLPFMHSRENPVGHYVQGLED-PMHLIR t45 KMVHKIFELDWVEMEKSHQQ-SSVDDNYL-VNMLPFFHSRENPVGHYVQGLED-PMHLIR t74 KMVHKIFELDWVEMEKSHQQ-SSVDDNYL-VNMLPFFHSRENPVGHYVQGLED-PMHLIR t26 KMVIKIFGLDWCEIEKSHQQ-SSVDDNYL-VNMLPFTHSRETPVGHYVQGLED-PMHLMR t61 KMVLKIFGLDWCEIEKSHQQ-SSVDDNYL-VNMLPFAHSRQTPVGHYVQGLED-PMHLMR t97 KMNLKIFGLDWCEIEKSHQQ-SSVDDNYL-VNMLPFTHSRETPVGHYVQDLED-PMHLMR t17 KMVIKIFGLDWCEMEKAHQQ-SSVDDNYL-VNMLPFTHSRENPVGHYVQGLED-PMHLIR t11 KMVIKIFGLDWCEMEKAHQQ-SSVDDNYL-VNMLPFTHSRENPVGHYVQGLED-PMHLIR t23 KMILNIFGLDWCEVETSHQE-SSSDDNHL-VNMLPFGVSRDDPVGHYMLGLED-AIRLYR t76 DLNCDDFELGW-NLKH-------EDDHPL-LCIGSFSVHKYVSVMVYPLPMND-CVRMSQ t53 DLNCDDFELGW-NLKP-------EDDHPL-LCIGSFSVHKYVSVMVYPLPMND-CVRMSQ t16 DLNCDDFELGW-NLKP-------EDDHPL-LCIGSFSVHKYVSVMVYPLPMND-CVRMSQ t42 DLTVDDFELGW-VLKP-------EDDHPL-ICIGSFSVHKQFSVMVYPLPMND-AIRMSQ t24 DLTCDDFELGW-VLKP-------EDDHPL-ICIGSFSVHKQFSVMVYPLPMND-AIRMSQ t73 PNT---AQKLSILT---VFHFSGGFRDKPM-ERSQLHS-TYSED----KKDQKVLIHAMK t66 PNT---AQKLSILT---VFHFSGGFRDKPM-ERSQLHS-TYSED----KKDQKVLIHAMK t32 PNT---AQKLSILT---VFHFSGGFRDKPM-ERSQLHS-TYSED----KKDQKVLIHAMK t75 PNT---ARQLSILV---VMHFSGGFRDKPI-ERSQLHC-TYSEN----KKDQKVVIRAMK t79 PNT---ARRLSILH---VMHYSGEFHDKSA-ERSQLHS-RYSEH----KNDHKCVILASK t27 PNT---ARRLSILH---VMHYSGAFHDKSA-ERSQLHS-RYSEH----KNDHKCVILASK t93 PNT---ARRLSILH---VMHYSGAFHDKSA-ERSQLHS-RYSEH----KNDHKCVILASK t59 PNT---ACRLSILH---VMHYSGEFHDKSA-ERSQLHS-RYSEH----KNDHKCVILASK t7 PNT---ACRLSILH---VMHYSGEFHDKSA-ERSQLHS-RYSEH----KNDHKCVILASK t78 PNT---ACRLSILH---VMHYSGEFHDKSA-ERSQLHS-RYSEH----KNDHKCVILASK t12 PNT---ARRLSILM---VMHYSGEFHDKSP-ERSQLHFDSYSEH----KNDHKCVILASK t81 PET---ARRLSILA---VMHYSGEFHDKSP-ERSHLHF-RYSEM----KNDVKCVI---K t21 PET---ARRLSILV---VMHYSGEFHDKSP-ERSHLHY-RYSEV----KNDVKCVILASK t80 PNT---AHRLSILV---VTHYSGGFHDKSA-ERSQHHF-IYNEV----KDDHKCVILASK t14 PNT---AHRLSILV---VTHYSGGFHDKSA-ERSQHHF-IYNEV----KDDHKCVILASK t85 PNT---AHRLSILV---VMHYSGGFHDKSS-ERSQHHF-IYNEV----KNDHKCVILASK t62 PNT---AHRLSILV---VMHYSGGFHDKSA-ERSQHHF-IYNEI----KNDHKCVILASK t19 PNT---AHRLSILV---VMHYSGGFHDKSA-ERSQHHF-IYNEI----KNDHKCVILASK t77 PNT---AHRLSILV---VMHYSGGFHDNSA-ERSQHHF-IYNEV----KNDHKCVILASK t88 PVT---LQALSILP---CPHASGKEQDGAV-ERSQHYV-VYAEL----QVDHKCTIAAHK t37 PVT---LQGLSILP---CPHASGKEQDGAV-ERSQHYG-VYQEL----QNDHKSTIDANK t35 PVT---LQGLSILP---CPHASGKEQDGAV-ERSQHYG-VYQEL----QNDHKSTIDANK t48 PVT---LQGLSILP---CPHASGKEQDGAV-ERSQHYG-VYQEL----QNDHKSTIDANK t55 PVT---LQGLSILP---CPHASGKEQDGAV-ERSQHYG-VYQEL----QNDHKSTIDANK t46 PVT---LQGLSILP---CPHASGKEQDGAV-ERSQHYG-VYQEL----QNDHKSTIDANK t67 PVT---LHGLSILP---CPHASGKEQDGAE-ERSQHYG-VYQEL----QNDAKSTIDANK t57 PVT---LQGLSILP---CPHASGKEQDGAV-ERSQHYG-VYQEL----QNDAKSTIDANK t56 PVT---LQGLGILV---CPHASGKEQDGAM-ERSQHYV-VYAEL----QNDHKNTINANK t13 PHT---LRDLYILAC--CPVASGKEQDGNK-ERSQHYT-IYAEI----QNDHKNPISANN t5 PHT---LRDLYILAC--CPVASGKEQDGNK-ERSQHYT-IYAEI----QNDHKNPISANN t38 PHT---LRELYILA---CPVASGKEQDGAQ-ERSQCYT-IYAEL----QNDHKSLISANH t33 PHT---LRELYILA---CPVASGKEQDGAK-ERSQCYT-IYAEL----QNDHKSLISANH t100 PHT---LRELYILA---CPVASGKEQDGAQ-ERSQCYT-IYAEL----QNDHKSLISANH t34 PHT---LRELYILA---CPVASGKEQDGAQ-ERSQCYT-IYAEL----QNDHKSLISANH t84 PHT---LRELYILA---CPVASGKEQDGAK-ERSQCYT-IYAEL----QNDHKSLIAANH t92 PHT---LRELYILA---CPVASGKEQDGAQ-ERSQCYT-IYAEL----QNDHKSLISANH t3 PHT---LADLFILS---CPVASGKEQDGAT-ERSQYYV-VYAEL----QNDHKSPISANK t22 PHT---LRDLFILA---CPVASGKEQDGAT-ERSQHYV-VYAEL----QNDHKSPISANK t64 PHT---LRDLFILS---CPVASGGEQDGNT-ERSQHYI-VYAEL----QNDHKSPISANK t18 PHT---LRDLFILS---CPVASGGEQDGNT-ERSQHYI-VYAEL----QNDHKSPISANK t68 PHT---LRDLFILS---CPVASGGEQDGNT-ERSQHYI-VYAEL----QNDHKSPISANK t28 PHT---LRDLFILA---CRVASGGEVDGNT-ERSQHYI-VYAEL----QNDHKSPISANK t82 PHT---LRDLFILA---CPVASGGEQDGNA-ERSQHYI-VYAEL----VNDHKSPISANK t41 PHT---LRDLFILA---CPVASGGEQDGNA-ERSQHYI-VYAEL----VNDHKSPISANK t71 PHT---LRDLFILA---CPVASGGEQDGNT-ERSQHYI-VYAEL----QNDHKSPISANK t94 PHT---LRDLFILA---CYVASGGEQDGNT-ERSQHYI-VYAEL----QNDHKSPILANK t99 PHT---LRDIFILA---CPVASGGEQDGNT-ERSQHYI-VYAEL----QDDHKSPISANK t40 PHT---LRNIFILA---CPVASGGEQDENT-ERSQHYI-VYAEL----QNDHKSPIHANK t90 PHT---LRNIFILA---CPVASGGEQDENT-ERSQHYI-VYAEL----QNDHKSPINANK t4 PHT---LRDIFILA---CPVASGGEQDAVT-ERSQHYI-VYAEL----QNDHKSPISASK t36 PGT---ARSLTILF-YGCQYYSGEFQDCEI-ERSQLYN-VYCEH----KQDHKSAIIANK t87 PGT---ARSLTILF-YGCQYYSGEFQDCEI-ERSQLYN-VYCEH----KQDHKSAIIANK t89 PGT---ARSLTILL-YGCQYYSGEFQDCEV-ERSQCYN-VYCEL----KQDHKSAIIANK t39 PGT---ARSLTILL-YGCQYYSGEFQDCEV-ERSQCYN-VYCEL----KQDHKSAIIANK t8 PGT---ARSLTILL-YGCQYYSGEFQDCEV-ERSQCYN-VYCEL----KQDHKSAIIANK t6 PGT---ARSLTILL-YGCQYYSGEFQDCEV-ERSQCYN-IYCEL----KQDHKSAIIANK t58 PGT---ARSLTVLF-YGCQYYSGQFQDCEI-ERSQLYN-VYCEL----KQDHKSAIMANK t54 PGT---ARSLTVLF-YGCQYYSGEFQDCEI-ERSQLYN-VYCEL----KQDHKSAIMANK t65 PGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDYKSAIIANQ t29 PGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDYKSAIIANQ t98 PGT---ARALVILF-YGCQYYSGKFQDSEMFERSQLYR-VYCEI----KKDYKSAIIANQ t83 PGT---ARALVILF-YGCQYYSGKFQDSDMFERSQLYR-VYCEI----KKDYKSAIIANQ t95 PGT---ARALIILF-YGCQYYSGKFQDSELFERSQLYR-MYCEI----KKDHKSAIIANQ t20 PGT---ARALIILF-YGCQYYSGKFQDSELFERSQLYR-MYCEI----KKDHKSAIIANQ t69 PGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHNSAIIANQ t2 PGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHNSAIIANQ t10 PGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHNSAIIANQ t31 PGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHKSAIIANQ t15 PGT---ARALVILS-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHKSAIVANQ t63 PGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHKSAIIANQ t50 PGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHKSAIMANQ t25 PGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHKSTIIANQ t51 PGT---ARALVILF-YGCQFYSGKFQDSELFERSQLYR-VYCEI----KKDHKSAIIANQ t9 PGT---ARALNILF-YGCEVYSGEFQDSES-ERSWIYN-VYCEI----KKDHKSAIIAHK t47 PGT---ARALNILF-YGCEYYSGEFQDSES-ERSWIYN-VYCEI----KKDHKSAIMACK t60 PGT---ARALNILF-YGCEYYSGEFQDSES-ERSWIYN-VYCEI----KKDHKSAIMAYK t30 PGT---ARALNILF-YGCEYYSGEFQDSES-ERSWIYN-VYCEI----KKDHKSAIMAYK t70 PGT---ARALNILF-YGCQYCSGEFQDSEE-ERSWIYN-VYCEI----KKDHKSAILAHK t91 PGT---ARALNILF-YGCQYCSGEFQDSEE-ERSWIYN-VYCEI----KKDHKSAILAHK t49 PGT---ARALNILF-YGCEYYSGEFQDSEE-ERSWIYN-VYCEI----KKDHKSAIMAHK t52 PGT---ARALNILF-YGCQYYSGEFQDSEA-ERSWIYN-VYCEI----KKDHKSGIMAHK t43 PGT---ARKLNILF-YGCEYYSGQFPDGEA-ERSWIYD-VYCEM----KKDHKSAVIAHK t1 PGT---ARKLNILF-YGCEYYSGQFPDGEA-ERSWIYD-VYCEM----KKDHKSAVVAHK t86 PGT---ARKLNILF-YGCEYYSGQFPDGEA-ERSWIYP-VYCEM----KKDHKSAVVAHK t72 PGT---ARKLNILF-YGCEYYSGQFPDGEA-ERSWIYD-VYCEM----KKDHKSAVVAHK t96 PGT---ARKLNILF-YGCEYYSGQFPDAEA-ERSWIYD-VYCEM----KKDHKSAVYAHK t44 PGT---ARKLNILF-YGCEYYSGQFPDCEA-ERSWIYD-VYCEM----KKDHKSAVVAHK t45 PGT---ARKLNILF-YGCQYYSGQFPDGEA-ERSWIYD-VYCEM----KKDHKSAVVAHK t74 PGT---ARKLNILF-YGCQYYSGQFPDGEA-ERSWIYD-VYCEM----KKDHKSAVVAHK t26 PGT---ARALNILF-YGCEYYSGQFPDGEP-ERSWIYD-VYCEL----KKDHKNAIVAVK t61 PGT---ARALNILF-YGCEYYSGHFPDGEA-ERSWIYD-MYCEL----KKDHKSAIVAVK t97 PGT---ARALNILF-YGCEYYSGHFPDGEP-ERSWIYD-MYCEL----KKDHKSAIVAVK t17 PGT---ARALNILF-YGCEYYSGQFPDGEP-ERSWIYD-VYCEL----KKDHKSAIVAVK t11 PGT---ARALNILF-YGCEYYSGQFPDGEP-ERSWIYD-VYCEL----KKDHKSAIVAVK t23 PGT---ARALNILF---VNYYSGDFQDPEL-ERSQLYN-VYCEQ----KQDHRNAIRASK t76 PCHAAHAQDIPTEQ---TRYQIHTFLDDSI-RRDLCNQ-GCHEENMVWRDDLKDPISTEV t53 PCHAAHAQDIPTEQ---TRYQIHTFLDDSI-RRDLCNQ-GCHEENMVWRDDLKDPISTEV t16 PCHAAHAQDIPTEQ---TRYQIHTFLDDSI-RRDLCNQ-GCHEENMVWRDDLKDPISTEV t42 PCHAAHAQDIHTDQ---DRYDVRTFLGHSI-KCHMCNQ-ACHEENIVWRDDLKDPITTEV t24 PCYAAHAQDIHTDQ---DRYDIRTFLEHSI-RCHLCNQ-ACHEENIVWRDDLKDPITTEV t73 -RNKGLDWHAGND-MKGGPK-IIYLGMLFY-------AENNVAKQKAHFVVFL-ISDATR t66 -RNKGLDWHAGND-MKGGPK-IIYLGMLFY-------AENNVAKQKAHFVVFL-ISDATR t32 -RNKGLDWHAGND-MKGGPK-IIYLGMLFY-------AENNVAKQKAHFVVFL-LSDATR t75 -RNKGCDWHAGKD-MAGGPK-IIYVGMLFY-------AESNWAKQKAHFVVFL-ISDATR t79 -HNKDHDWATGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNHFVIFL-ICDAVR t27 -VNKDHDWITGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNHFVIFL-ICDAVR t93 -VNKDHDWITGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNHFVIFL-ICDAVR t59 -HNKDHDWATGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNHFVIFL-ICDAVR t7 -HNKDHDWATGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNHFVIFL-ICDAVR t78 -HNKDHDWATGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNHFVIFL-ICDAVR t12 -HNKGLDWATGKDDMKGGAK-IMMSGALFY-------AEQNWIRARNHFVIFL-ICDAVR t81 -SNKGLDWTAGKDGMKGEAK-AMMNGALFY-------AEQNWFRKKNHFVIFV-FCDAVR t21 -SNKGLDWTAGKDGMKGENK-AMMNGTLFY-------AEQNWFRQKNHFVIFV-ICDAVR t80 -CNKGLDWAAGKD-MKGGAK-VMMNGALFY-------AERNWIRQKHHFVIFV-VCDAVR t14 -CNKGLDWAAGKD-MKGGAK-IMMNGALFY-------AERNWIRQKHHFVIFV-VCDAVR t85 -SNKGLDWAAGKD-MKGGAK-MMANGALFY-------EERNWIRQKNHFVIFV-VCDAVR t62 -ANKPLDWAAGKD-MKGGAK-MMANGALFY-------AERNWIRQKNHFVIFI-ICDAVR t19 -ANKGLDWAAGKD-MKGGAK-MMVNGALFY-------AERNWIRQKNHFVIFV-ICDAVR t77 -ANKGLDWAAGKD-MKGGAK-MMVNGALFY-------AERNWIRQKNHFVIFV-ICDAVR t88 YQDKMKDWDTGKN-MDEGAK-MYVEIHLFY-------AENNIRRQNDHSVIFW-ISDAKK t37 NDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNLRKQNEHSVTFV-ISDAQK t35 NDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNLRKQNEHSVTFV-ISDAQK t48 NDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNLIKQNEHSVTFV-ISDAQK t55 NDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNLRKQNEHSVTFV-ISDAQK t46 NDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNIRKQNEHSVTFV-ISDAQK t67 NPNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNIRKLNEHSVTFV-ISDAQK t57 NPNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNIRKQNEHSVTFV-ISDAQK t56 VPRKMLDWDTGKN-MDKGAKGMYHEIVLFF-------AENNVKKQVEHSVIFI-IADAQK t13 QPNKMLDWDTGKT-MDRGAA-MCGEIKLFY-------AEPNVWNQNDHSVMFI-ISDAQN t5 QPNKMLDWDTGKT-MDRGAA-MCGEIKLFY-------AEPNVWNQNDHSVMFI-ISDAQN t38 VPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDHSVMFI-ISDAQN t33 VPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDHSVMFI-ISDAQN t100 VPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDHSVMFI-ISDAQN t34 VPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDHSVMFI-ISDAQN t84 VPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDHSVMFI-ISDAQN t92 VPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDHSVMFI-ISDAQN t3 HPDKMLDWDTGKS-MDRGAA-MCREIKLFY-------AEPNVWKQNDHSVVFI-ISDAVN t22 HPNKLLDWDTGKT-MDRGAA-MCHEIKLFH-------AEPNVWRQNDHSVMFI-ISDASN t64 HLNKMLDWDTGKT-MDREAT-MCREMKLFY-------AETNLWKQNDHSVMFI-ISDAQN t18 HLNKMLDWDTGKT-MDREAT-MCREMKLFY-------AETNLWKQNDHSVMFI-ISDAQN t68 HLNKMLDWDTGKT-MDREAT-MCREMKLFY-------AETNLWKQNDHSVMFI-ISDAQN t28 VLNKMLDWDTGKT-MDREAA-MCREIKLFY-------AETNVWKQNDHSVMFI-ISDAQN t82 VLNKMLDWDTGKT-MDREAA-MCRDIKLFY-------AETNVWKQNDHSVMFI-ISDAQN t41 VLNKMLDWDTGKT-MDREAA-MCRDIKLFY-------AETNVWKQNDHSVMFI-ISDAQN t71 HMNKMLDWDTGKT-MDFEAA-MCREIKLFY-------AETNVWKINDHSVMFI-VSDAQN t94 VVNKMLDWDMGKT-MDFEAA-MCREIKLFY-------AETNVWKQNNHSVMFI-VSDAHN t99 HMRKMLDWDTGKT-MDREAA-VCREIKLFY-------AETNCWKQNDHSVMFI-VSDAQN t40 VMNKMLDWDTGKT-MDREAA-MCREIKLFV-------AETNVWKQNDHSVMFIVISDAQN t90 VMNKMLDWDTGKT-MDREAA-MCREIKLFV-------AETNVWKQNDHSVMFIVISDAQN t4 HMNKMLDWDTGKT-MDREAA-MCREIKLFY-------AETNVWKQNDHSVMFI-ISDAQN t36 QEQKGMDWNTGKE-MEQGPK-IILHGSLFF-------AEPNIVRQPGVSHIFI-GNDARR t87 QEQKGMDWNTGKE-MEQGPK-VILHGTLFF-------AESNIVRQPGVSHIFI-GNDARR t89 QEQKGMDWNTGKE-MEQGPK-IILHGCLFF-------AEPNIVRQPGVSHIFT-ENDARR t39 QEQKGMDWNTGKE-MEQGPK-IILHGCLFF-------AEPNIVRQPGVSHIFT-ENDARR t8 QEQKGMDWNTGKE-MEQGPK-IILHGCLFF-------AEPNIVRQPGVSHIFT-ENDARR t6 QEQKGMDWNTGKE-MEQGPK-IILHGCLFF-------AEPNIVRQPGVSHIFI-ENDARR t58 QEQKGMDWNTGKE-MEQGPK-IILHGVLFF-------AEPNVVRQPGVSHIFV-GNDARR t54 QEQKGMDWNTGKE-MEQGPK-IILHGVLFF-------AEPNVVRQPGVSHIFV-GNDARR t65 QEHKGMEWDTGKE-MQQGPK-VVVHLALFYAPSNVLAAEPNIIGQPQVSHVFV-GNDARR t29 QDHKGMEWDTGKE-MQQGPK-VVVHLALFYMPSNVLAAEPNIIGQPQVSHWFV-GNDARR t98 QEHKGMEWDTGKE-MQQGPK-IVVHLGLFYMPSNVLAAEPNIIGQPQVSHVFV-ANDARR t83 QEHKGMEWDTGKE-MQQGPQ-IEVHLALFYMPSNVLPAEPNIIGQPQVSHVFV-GNDARR t95 QEHKGMEWDTGKE-MQQGPK-VVLHHALFYAPSNVLAAEPNIIGQPQVSHVFV-GNDARR t20 QEHKGMEWDTGKE-MQQGPK-VVLHHALFYAPSNVLAAEPNIIGQPQVSHVFV-GDDARR t69 QEHKGMDWDTGKE-MQQGPK-IVVVLALFYQPSNVLAAEPNIMGQPQVSVVFV-GNDGRR t2 QEHKGMDWDTGKE-MQQGPK-IVVVLALFYQPSNVLAAEPNIMGQPQVSVVFV-GNDARR t10 QEHKGMDWDTGKE-MQQGPK-IVVVLALFYQPSNVLAAEPNIMGQPQVSVVFV-GNDARR t31 HEVKGMDWNTGKE-MQQGPK-IVVHLALFYAPSNVLAAEPNIVGQPQVSHVFV-GNDARR t15 QEVKGMDWDTGKE-MQQGPK-IVVHLALFYAPSNVLAAEPNIVGQPQVSHVFV-GNDARR t63 QGVKGMDWDTGKE-MQQGPK-IVVHLALFYAPSNVLAAEPNIIGQPQVSHVFV-DNDARR t50 QGVKGMDWDTGKE-MQQGPK-IVMHLALFYAPSNVLAAEPNIIGQPQVSHVFV-DNDARR t25 QGVKGMDWDTGKE-MQQGPK-IVVHLTLFYAPSNVLAAEPNIFGQPQVSHVFV-DNDARR t51 QGVKGMDWDTGKE-MQQGPK-IVVHLALFYAPSNVLAAEPNIIGQPQVSHVFV-DNDIRR t9 YEHKGMDWDTGKE-AQQGPE-VVHHNLLFF-------AEPNIHGQPGVGHIFV-GNDARR t47 YRHKGMDWDTGKE-AEQGPE-VVHHNLLFF-------AEPNIQGQPGVGHIFV-GNDARR t60 FEHKGMDWDTGKE-AEQGPE-VVHHNLLFF-------AEPNIQGQPGVGHIFV-GNDARR t30 YEHKGMDWDTGKE-AEQGPE-VVHHNLLFF-------AEPNIHGQPGVGHIFV-GNDARR t70 YKHKGMDWDTGKE-MEQGPK-VVNYNLLFY-------AEPNIHGQPRVGHIFV-GNDAHR t91 YKHKGMDWDTGKE-MEQGPK-VVNYNLLFY-------AEPNIHGQPRVGHIFV-GNDAHR t49 YEHKGMDWDTGKE-MEQGPK-VVHHNILFY-------AEPNIHGQPRVGHIFV-GNDAVR t52 YDHKGVDWDTGKE-MEQGPK-VLHHNLLFY-------AEPNIHGQPTVGHIFV-GNDARR t43 HEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATR t1 VEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATR t86 AEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATR t72 VEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATR t96 VEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATR t44 HEHKGMDWDTGKE-MNQGPK-VVMHGVLFN-------AEPNIHGQPGVSHVFM-GNDATR t45 HQHKGMDWDTGKE-MNQGPK-VVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATR t74 HQHKGMDWDTGKE-MNQGPK-VVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATR t26 HEHKGMDWDTGKE-MNHGPK-VIVHGVLFH-------AEPNIGGQPGVSHVFV-GNDATR t61 HCHKGMDWDTGKE-MNHGPK-VIIHGLLFH-------AEPNIGGQPGVSHVFV-GNDATR t97 HCHKGMDWDTGKE-MNHGPK-VIIHGVLFH-------AEPNIGGQPGVSHVFV-GNDATR t17 HEHKGMDWDTGKE-MNQGPK-VIIHGVLFH-------AEVNIHGQPGVSHVFV-GNDATR t11 HEHKGMDWDTGKE-MNQGPK-VIIHGILFH-------AEVNIHGQPGVSHVFV-GNDATR t23 NDHKGMAWDTAKD-MEQGPK-MVEHQVLFY-------AEPNMHGQPEVIHIFI-GNDAMC t76 IADKLHEWPTNVN-KENSAD----HRQLFH-------ANSSALDKHQHNVVMN-GSPCIR t53 IADKLHEWPTNQN-KENSAD----HRQLFH-------ANSSALDKHQHNVVMN-GSPCIR t16 IADKLHEWPTNQN-KENSAD----HRQLFH-------ANSSALDKHQHNVVMN-GSPCIR t42 IAEKLQEWPTNQN-KENVAD----HRQLFH-------ANSSALDKHRHNVMMN-GSACIR t24 IADKLQEWPTNQN-AENVAD----HRQLFH-------ANSSCLDRHRHNVMMN-GSSCIR t73 LLIVHEGCMIDYTFMEFACHPFFAELFMEHMVARYQYYSNVDIKIFDTCFRSAVYAVDNS t66 LLIVHEGCMIDYTFMEFACHPFFAELFMEHMVARYQYYSNVDIKIFDTCFRSAVYAVDNS t32 LLIVHEGCMIDYTFMEFACHPFFAELFMEHMVARYQYYSNVDIKIFDTCFRSAVYAVDNS t75 LLIHHVGCMIEYTFMEFACHPFFAELFMEHMVIRYQYYNNVDIKIFDTCFRGAVYAEDNS t79 ILILHHGCMTDHTFMEFACNPFFSELFMEHVYIRYQYINNHDIKIKDTVFRLAVYAWDNF t27 ILILHHGCMTDHTFMEFACNPFFSELFMEHVYIRYQYVNNHDIKIKDTVFRLAVYAWENF t93 ILILHHGCMTDHTFMEFACNPFFSELFMEHVYIRYQYVNNHDIKIKDTVFRLAVYAWENF t59 ILILHHGCMTDHTFMEFACNPFFAELFMEHVYIRYQYVNNHDIKIKDTVFRLAVYAWENF t7 ILILHHGCMTDRTFMEFACNPFFAELFMEHVYIRYQYVNNHDIKIKDTVFRLAVYAWENF t78 ILILHHGCMTDRTFMEFACNPFFAELFMEHVYIRYQYVNNHDIKIKDTVFRLAVYAWENF t12 ILIVLHGCMTAWTFMEFACNPFFAELFMEHVYIRYQYVNNHDIKIKDTEFRLAVYAWENS t81 ILIMHSGVMIGYTFMEFACNPFFAELFMEHVMVRYQYESNHDIKIKDTCFRLAVYAWENS t21 ILIVHSGVMTGFTFMEFACNPFFAELFMEHVYIRYQYQANHDIKIKDTCFRLAVYAWENS t80 ILIVHYGCMIGYTFMDFACNPFFAELFMDHVFIRYVYMNNHDIKIMDTCFRVAVYTWENS t14 ILIVHYRCMIGYTFMDFACNPFFAELFMDHVFIRYQYMNNHDIKIMDTCFRVAVYTWENS t85 ILIVHYGCMVPYTFMAFACNPFFAELFMEHVFIRYQYVNNHDIKIMDTCFRVAVYTWENS t62 ILIVHYGCMVGDTFMDFACNPFFAELFMEHVFIRYQYIPNHDIKIMDTCFRVAVYTWENS t19 ILIVHYGCMVGDTFMDFACNPFFAELFMEHVFIRYQYIPNHDIKIMDTCFRVAVYTWENS t77 ILIVHYGCMVGDTFMDFACNPFFAELFMEHVVIRYQYIPNHDIKIMDTCFRVAVYTWENS t88 RLICGMSSMCLVTFMTLACNPLFAKLFMEVVPMRYDYLTNHDIKI------MEVYAPENS t37 RLICGVSSMVMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPENS t35 RLICGVSSMAMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPENS t48 RLICGVSSMVMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPENS t55 RLICGVSSMCMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPENS t46 RLICGVSSMCMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPDNS t67 RLICGVSSMCMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPENS t57 RLICGVSSMCMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPENS t56 RLICGVSSMCLQTFMNLACNPFFAKLFMEVIPMRYDYQTNHDIKI------KEVYAPENS t13 RLIVANSIMVGQTFMAMACNPHFSKLFMEVVCMRYNYTSNHSIKI------FEVYADENS t5 RLIVANSIMVGQTFMAMACNPHFSKLFMEVVCMRYNYTSNHSIKI------FEVYADENS t38 KLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSNHSIKI------FEVYADENS t33 RLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSNHSIKI------FEVYADENS t100 RLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSNHSIKI------FEVYADENS t34 RLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSNHSIKI------LEVYADENS t84 RLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSNHSIKI------LEVYADENS t92 RLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSNHSIKI------LEVYADENS t3 RLICANSIMCGLTFMAMACNPHFAKLFMEVVCMRYDYFSNHSIKI------FEVYAEDDS t22 RLICANSIMCGLTFMAMACNPHFAKLFMEVVCMRYEYFSNHSIKI------FEVYAEENS t64 RLICGNSIMCGLTFMAMACNPHFAKLFMEVVCVHYDYGANHDIKI------FEVYAAENS t18 RLICGNSIMCGLTFMAMACNPHFAKLFMEVVCVHYDYGANHDIKI------FEVYAAENS t68 RLICGNSIMCGLTFMAMACNPHFAKLFMEVVCVHYDYGANHDIKI------FEVYAVENS t28 RLICGNSIMCGITFMAMACNPHFAKLFMEVACMRYDYFANPDIKI------FEVYAAENS t82 RLICGNSIMCGLTFMAMACNPHFAKLFMEVACMRYDYFANPDIKI------FEVYADENS t41 RLICGNSIMCGLTFMAMACNPHFAKLFMEVACMRYDYFANPDIKI------FEVYADENS t71 RLICGNSIMCGLTFMAMACNPHFAKLFMEGVCMRYDYSANHDIKI------FEVYADENS t94 RLICGNSIMCALTFMAMACNPHFAKLFMEAVCMRYDYSANHDIKI------VEVYADENS t99 RLICGSSIMCRLTFMAMAVNPHFAKLFMEDVCMRYDYSPNHDIKI------FEVYADENS t40 RLICGSSIMCRLTFMAMAVNPHFAKLFMEESCVRYDYSANHDIKI------FEVYANENS t90 RLICGSSIMCGLTFMAMAVNPHFAKLFMEESCVRYDYSRNHDIKI------FEVYANENS t4 RLICGSSIMCGLTFMATACNPHFAKLFMEEVCMRYDYAANHDIKI------SEVYAEENS t36 VLICGKSMMPGHRFMREACVPFFHKLFMAVNQMRYDYMMNYDIKIYETHWRMGVYALDNS t87 VLICGLSMMPGHRFMREACVPFFHKLFMAVNQMRYDYMVNYDIKIYETHWRMGVYALDNS t89 MLICGNSMMPRHRFMREACVPFFHKLFMAVNQMRYDYVTNYDIKIYETHWRVGVYAIDNS t39 MLICGNSMMPRHRFMREACVPFFHKLFMAVNQMRYDYVTNYDIKIYETHWRVGVYAIDNS t8 MLICGNSMMPRHRFMREACVPFFHKLFMAANQMRYDYVTNYDIKIYETHWRVGVYAIDNS t6 MLICGNSMMPRHRFMREACVPFFHKLFMAVNQMRYDYMTNYDIKIYETHWRVGVYAIDNS t58 MLICGLSMMPGHRFMKEACVPFFHKLFMAVRRMRYDYMSNYDIKIYETHWRPGVYALDNS t54 MLICGLSMMPGHRFMKEACVPFFHKLFMAVRRMRYDYMTNYDIKIYETHWRPGVYALDNS t65 MLIVGVSMMPAYKFMREACVPFFRKLFMAENQVRYDYMWNYDIKIYETHFRMGVYAVDNS t29 MLIVGVSMMPAYKFMREACVPFFRKLFMADNQVRYDYMWNYDIKIYETHFRMGVYAVDNS t98 MLIVGVSMMPAYKFMREACVPFFRKLFMAENQVRYDYVWNYDIKIYETHFRMGVYAVDNS t83 MLIVGVSLMPAYKFMREACVPFFRKLFMAENQVRYDYMWNYDIKIYETHFRMGVYAVDNS t95 MLITGVSMMPAYKFMREACVPFFRKLFMAENQVRYDYVWNYDIKIYETHYRMGVYAVDNS t20 MLITGVSMMPAYKFMREACVPFFRKLFMAENQVRYDYVWNYDIKIYETHYRMGVYAVDNS t69 MLIVGCSMMPDYKFMREACVPFFRKLFMAEVQDRYDYAWNYNIKIYETHFRDGVYAVDNS t2 MLIVGCSMMPDYKFMREACVPFFRKLFMAEVQDRYDYNWNYNIKIYETHFRVGVYAVDNS t10 MLIVGCSMMPDYKFMREACVPFFRKLFMAEVQDRYDYNWNYNIKIYETHFRVGVYAVDNS t31 MLIVGVSVMPSYKFMREACVPFFHKLFMADNQVRHEYMWNYDIKIYETHFRMGVYAVDNS t15 MLIVGVSVMPAYKFMREACVPFFHKLFMAENQVRHDYMWNYDIKIYETHFRMGVYAVDNS t63 MLIVGQSVMPPYKFMREACVPFFHKLFMAENQVRHDYVWNYDIKIYETHFRMGVYAIDNS t50 MLIVGQSVMPAYKFMREACVPFFHKLFMAENQVRHDYVWNYDIKIYETHFRMGVYAVDNS t25 MLIVGQSVMPAYKFMREACVPFFHKLFMAENQVRHDYVWNYDIKIYETHFRMGVYAVDNS t51 MLIVGVSVMPAYKFMREACVPFFHKLFMAENQVRHDYVWNYDIKIYETHFRMGVYAVDNS t9 KLIAGVSFMSMVKFMCEACVPFFRKLFMAVGQMRYDYVNNYDIKIWETVFRGGVYAIENS t47 KLIAGLSFMAMMKFMCEACVPFFRKLFMAVGHMRYDYVSNYDIKIWETVFRGGVYAIENS t60 KLIAGVSFMAVMKFMCEACVPFFRKLFMAVGQMRYDYMNNYDIKIWETFFRGGVYAIENS t30 HLIAGVSFMAVVKFMCEACVPFFRKLFMAVGQMRYDYVNNYDIKIWETVFRGGVYAIENS t70 KLIAGVSFMSIMKFMCEACVPFFRKLFMAVRQMRYMYMNNYDIKIWETAFRGGVYAIENS t91 KLIAGVSFMSIMKFMCEACVPFFRKLFMAVRQMRYMYMNNYDIKIWETAFRGGVYAIENS t49 NLIAGVSFMSIMKFMCEACVPFFRKLFMAVRQMRYMYMNNYDIKIWETAFRGGVYAMENS t52 KLIAGVSFMSVVKFMCEACVPFFRKLFMAFRQMRYDYVNNYDIKIWETVFRGGVYAIENS t43 KLITGVSVMPTYKFMCGACVPFFHKLFMAVRNMRYDYNVNYDIKIWETHIRHGVYAVENS t1 KLITGVSVMPTYKFMCGACVQFFHKLFMAVRNMRYDYTVNYDIKIWETHIRQGVYAVENS t86 KLITGVSVMPTYKFMCGACVPFFHKLFMAVRNMRYDYTVNYDIKIWETHIRHGVYAVENS t72 KLITGFSIMPTYKFMCGACVPFFHKLFMAVRNMRYDYTVNYDIKIWETHIRHGVYAVENS t96 KLISGVSVMPTYKFMCGACVPFFHKLFMAVRNMRYDYTVNYDVKIWETHIRHGVYAVENS t44 KLITGVSVMPTYKFMCNACVPFFHKLFMAVRNMRYDYTVNYDIKIWETHMRHGVYAVENS t45 KLIPGVSVMHDYKFMCGACVPFFHKLFMAVRNMRYDYTVNYDIKIWETHLRHGVYAVENS t74 KLIPGVSVMHDYKFMCGACVPFFHKLFMAVRNMRYDYTVNYDIKIWETHLRHGVYAVENS t26 KLIAGVSVMPIVKFMCGACVPFFRKLFMALRNMRYDYASNYDIKIVETHLREGVYAVENS t61 KLIAGASVMFIVKFMCGACVPFFRKLFMALRNMRYDYSQNYDIKIVETHLREGVYAIENS t97 KLIAGVSVMPIVKFMCGACVPFFRKLFMALRNMRYDYSQNYDIKIVETHLREGVYAVENS t17 KLIAGVSVMPFVKFMCGACVPFFRKLFMALRNMRYDYTSNYDIKIIETHLRNGVYAVENS t11 KLIAGVSVMPFVKFMCGACVPFFRKLFMALRNMRYDYTSNYDIKIMETHLRNGVYAVENS t23 MLIPGMSVMVHHKFMRAACMPFFNKLFMAVRYMRYDYVDNYDIKIDETRYRDGVYARRNS t76 ALIIGGSSVGVNHFMMGPCQEFFTDLFMMYEGLQYACVCAMNITIQCLHTGEGVMCNVKC t53 ALIIGGSSVGVNHFMMGPCQEFFTDLFMMYEGLQYACVCAMNITIQCLHTGEGVVCNVKC t16 ALIIGGSSVGVNHFMMGPCQEFFTDLFMMYEGLQYACVCAMNITIQCLHTGEGVVCNVKC t42 TLIIGGSIVGVNVFMMEPCQEFFTDLFMVYEGLQYAVVCAVNIIIQCLHTNEGVVCNVKC t24 TLIIGGSIVGVNVFMMGPCQEFFTDLFMVYEGLQYAVVCAINIIIQVLHTNEGVVCNVKC t73 WETLCDY-EMLSGYFGAEINHNPRLPEQVC-PPCTITVSGLKGKLHNRWPVLTQITRTMR t66 WETLCDY-EMLSGYFGAEINHNPRLPEQVC-PPCTITVSGLKGKLHNRWPVLTQITRTMR t32 WETLCDY-EMLSGYFGAEINHNPRLPEQVC-PPCTITVSGLKGKLHNRWPVLTQITRTMR t75 WETLCGY-EMLSGYFGAEINHNPRLPEQVC-PPCTITVSGLKGKLHNRWPVLTQITRTMR t79 WETLCKC-EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPL t27 WETLCNC-EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPL t93 WETLCNC-EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPL t59 WETLCNC-EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPL t7 WETLCNC-EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPL t78 WETLCNC-EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPL t12 WETLCNC-EMLSGHIGAKINHNPRLFEQVC-PPCELTVSGLKGKH-VGWPYLTDITRTPL t81 WETLCNV-EMLSGHFGAKINHVPRLPEQVC-PPCDLTVSGLKGKQ-NGLPYLTDITRTPL t21 WETLCNV-EMLSGHFGAKINHVPRLPEQVC-PPCDLTVSGLKGKQ-NGWPYLTDITRTPL t80 WETCFNR-EMLSGTFGAKISHNPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTDITRTPL t14 WETCCNR-EMLSGTFGAKISHNPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTEITRTPL t85 WETCCNR-EMLSGTFGAKISHNPRLPEQVI-PPCDLTVSGLKGKH-NGWPYLTDITRTPL t62 WETCCNR-EMLSGTFGARISHVPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTDITRTPL t19 WETCCNR-EMLSGTFGARISHVPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTDITRTPL t77 WETCCNR-EMLSGTFGARISHVPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTDITRTPL t88 WETIKNV-EMVSGKEGAEINHTPQLPEQVI-PPCNLTVSNLKGMI-PPCPALTEITRTV- t37 WETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPVPALTEITRTP- t35 WETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPCPALTEITRTP- t48 WETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPCPALTEITRTP- t55 WETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPCPALTEITRTP- t46 WETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPCPALTEITRTP- t67 WETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPVPALTEITRTP- t57 WETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPVPALTEITRTP- t56 WETIKNV-EMVSGKAGKQINHEPQLPEQVI-PPCSLTVSNLKGMM-PPCPAMTEITRTV- t13 WENVRNF-EMVSGRAGAHINHIPQLPEQAY-PPCKLTVSNLKGVA-ASCPAITEITRTA- t5 WENVRNF-EMVSGRAGAHINHIPQLPEQAY-PPCVLTVSNLKGVA-ASCPAITEITRTA- t38 WENIRNF-EMVSGRAGAYINHIPQLPEQAV-PPCHLTVSNLKGVA-ASCPAITEITRTA- t33 WENIRNF-EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAITEITRTA- t100 WENIRNF-EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAITEITRTA- t34 WENIRNF-EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAMTEITRTA- t84 WENIRNF-EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAMTEITRTA- t92 WENIRNF-EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAMTEITRTA- t3 WENICNF-EMMSGRTGAQINHIPQLPEQVC-PPCHLTVSNLKGCC-ASVPAITEITRTV- t22 WENVCNF-EMMSGRAGAQINHIPQLPEQVC-PPCVLTVSNLKGCC-ASCPANTEITRTV- t64 WENARNF-EMMSGRAGAEINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPAITEITRTV- t18 WENARNF-EMMSGRAGAEINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPAITEITRTV- t68 WENARNF-EMMSGRAGAEINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPAITEITRTV- t28 WENVRNF-EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPDITEITRTV- t82 WENVRNF-EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPDITEITRTV- t41 WENVRNF-EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPDITEITRTV- t71 WENVRNF-EMMSGRAGAQINH-PQLPEQVCNPPCHLTVSNLKGVC-ASCPAITEITRTV- t94 WENVRNF-EMMSGRAGAQINH-PQLPEQVCNPPCHLTVSNLKGVC-ASCPAITEITRTV- t99 WENVRNF-EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPAITEITRTV- t40 WENVRNF-EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASVPAITEITRTV- t90 WENVRNF-EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASVPAITEITRTV- t4 WENVRNF-EMMSGRAGAQINH-PQLPEQVV-PPCHLTVSNLKGVC-ASCPAITEITRTV- t36 WETLNVS-EMTSGRIGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPD t87 WETLNVS-EMTSGRIGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPD t89 WETLNVS-EMTSGRMGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPD t39 WETLNVS-EMTSGRMGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPD t8 WETLNVS-EMTSGRMGAKINHLPRLPEHVI-PPCMLTVSGLKDTM-AGMKQETEITKTPD t6 WETLNVS-EMTSGRMGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPD t58 WETLNVS-EMTSGRIGAKINHLPRLPETVI-PPCVLTVSGLKDTM-AGMKHGTEITKTPD t54 WETLNVS-EMTSGRIGAKINHLPRLPETVI-PPCVLTVSGLKDTM-AGMKHGTEITKTPD t65 WETLVYC-EMTSGRIGAKINHLPRLPEQVT-PPCILTVSGLKRTV-AGAPDETEITKSPV t29 WETLVYC-EMTSGRIGAKINHLPRLPEQVT-PPCILTVSGLKRTV-AGAPDETEITKSPV t98 WETLVYC-EMTSGRIGAKINHLPRLPEQVT-PPCDLTVSGLKRTV-AGPPDETEITKSPV t83 WETLVYC-EMTSGRIGAKINHLPRLPEQVT-PPCILTVSGLKRTI-AGAPDETEITKSPV t95 WETLVYC-EMTSGRIGAKVNHLPRLPEQVT-PPWVLTVSGLKRTV-AGAPDETEITKSPI t20 WETLVYC-EMTSGRIGAKINHLPRLPEQVT-PPWVLTVSGLKRTV-AGAPDETEITKSPI t69 WETLVYV-EMASGRIGAKINHQPRLPEQVI-PPCILTVSGLKRTI-ADEPDETEITKTPI t2 WETLVYV-EMASGRIGAKINHMPRLPEQVI-PPCILTVSGLKRTI-ADEPDETEITKTPI t10 WETLVYV-EMASGRIGAKINHMPRLPEQVI-PPCILTVSGLKRTI-ADEPDETEITKTPI t31 WETLVYC-EMTSGRIGANINHLPRLPEQVV-PPCILTVSGLKRTI-AGSPDETEITKTLI t15 WETLVYC-EMTSGRIGAKINHLPRLPEQVV-PPCILTVSGLKRTI-AGAPDETEITKTLI t63 WETLVYC-EMTSGRIGAKINHQPRLPEQVV-PPCVLTVSGLKRTI-AGAPDETEITKTLI t50 WETLVYC-EMTSGRIGAKINHQPRLPEQVV-PPCILTVSGLKRTI-AGAPDETEITKTLI t25 WEALVYC-EMTSGRTGAKINHQPRLPEQVV-PPCILTVSGLKRTI-AGAPDETEITKTLI t51 WETLVYC-EMTSGRIGAKINHLPRLPEQVF-PPCILTVSGLKRTI-AGAPDETEITKTLI t9 WETLVLC-EMTSGRCGAKMNHLPRLFEQVC-PPCILTVSGLKETK-AGLIDGTEITKTPD t47 WETLVLC-EMTSGRCGAKMNHLPRLFEQVC-PPCILTVSGLKETK-AGIIDGTEITKTPE t60 WETLVLC-EMTSGRCGAKMNHLPRLFEQVC-PPCILTVSGLKETR-AGIIEGTEITKTPD t30 WETLVLC-EMTSGRCGAKMNHLPRLFEQVC-PPCILTVSGLKETR-AGIIDGTEITKTPD t70 WETLVLC-EMTSGRSGAKMNHLPRLFEQVC-PPCLLTVSGLKETV-AGIIDGTEITKTPE t91 WETLVLC-EMTSGRSGAKMNHLPRLFEQVC-PPCLLTVSGLKETV-AGIIDGTEITKTPE t49 WETLVLC-EMTSGRVGARMNHLPRLFEQVC-PPCILTVSGLKETV-AGVIDGTEITKTPE t52 WETLILC-EMTSGRCGAKMNHMPRLFEQVC-PPCILTVSGLKETM-AGVIDGTEITKTPD t43 WETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCVLTVSGLKQPM-AGYNDQTEITKTPD t1 WETLITC-EMTSGRIGAKINHLPRLPEQVV-PPCILTVSGLKQPM-AGYNDETQITKTPD t86 WETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGYNDETQITKTPD t72 WETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPV-AGYNDETQITKTPD t96 WETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGYNMETQITKTPD t44 WETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGYNDETEITKTPD t45 WETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGFVDETEITKTPD t74 WETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGFVDETEITKTPD t26 WETLVVC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQSV-SGYNDETEITKTPD t61 WETLVVC-EMTSGRMGAKINHLPRLPEQVI-PPCILTVSGLKRAV-NGYNDETEITKTPD t97 WETLVVC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQAV-NDCNDETEITKTPD t17 WETLVVC-EMTSGRIGAKINHLPRLPEQVI-PPCMLTVSGLKQAM-AGVNDETEITKTPD t11 WETLVVC-EMTSGRIGAKINHLPRLPEQVI-PPCMLTVSGLKQAM-AGVNDETEITKTPD t23 WETANVC-EMSSGRSGAKINHNPRLPEQVT-PPRSLTVSGLKDTM-PGHPDVTEITKTPV t76 KEFLQREDEMKAGLIGIICNHLSRMIMVIL-VHCLLTHAGLKSME-IGFDR-TGVVRMPL t53 KEFLQREDEMKAGLIGIICNHLSRMIMVIL-VHCLLTHAGLKSME-IGFDR-TGVVRMPL t16 KEFLQREDEMKAGLIGIICNHLSRMIMVIL-VHCLLTHAGLKSME-IGFDR-TGVVRMPL t42 KEFLQREEDMKSGLIGIICNHISRMCMHIL-VVCLLTHCGLKAME-IPFDI-TGVVRGPL t24 KEFLQREDDMKSGLIGIICNHISRMCMHIL-VHCLLTHCGLKAME-IPFDR-TGVVRGPL t73 IQIAIRVSGSE-SENVVLQYCLYYEENKSIR-----------QNNTGSADLPK------- t66 IQIAIRVSGSE-SENVVLQYCLYYEENKSIR-----------QNNTGSADLPK------- t32 IQIAIRVSGSE-SENVVLQYCLYYEENKSIR-----------QNNTGSADLPK------- t75 IQIPIRISGSE-SENVVLQYCLYYEENKSIR-----------QNNTGSAVLPK------- t79 IRIKIREPGSE-SENTVVMYVIYYKERKSIR-----------QNNTGCAKLPQ------- t27 IRIKIREPGSE-SENTVVMYVIYYKERKSIR-----------QNNTGCAKLPQ------- t93 IRIKIREPGSE-SENTVVMYVIYYKERKSIR-----------QNNTGCAKLPQ------- t59 IGIKIREPGSQ-SENTVVMYVIYYKERKSIR-----------QNNTGCAKLPQ------- t7 IGIKIREPGSQ-SENTVVMYVIYYKERKSIR-----------QNNTGCAKLPQ------- t78 IGIKIREPGSQ-SENTVVMYVIYYKERKSIR-----------QNNTGCAKLPQ------- t12 IRIQIREPGSE-CENSVVMYVIYYKERKSIR-----------QNNTGCAKLPQ------- t81 IRIQIRVPGSE-SENVVVMYMVYYKERKSIR-----------QNNTGCANLPQ------- t21 IRIQIRVPGSE-SENITVMYMVYYKERKSIR-----------QNNTGCANLPQ------- t80 IKIHIRVPGSQ-SENHVVMYCVYYKEHKSIR-----------QNNTGCANLPQ------- t14 IKIHIRVPGSQ-SENHVVMYCVYYKEHKSIR-----------QNNTGCANLPQ------- t85 IKIHIRIPGSQ-SENHVVMYCVYYKEHKSIR-----------QNNTGCANLPQ------- t62 IRIHIRVPGSE-SENHVVMYCVYYKEHKSIR-----------QNNTGCANLPQ------- t19 IRIHIRVPGSE-SENHVVMYCVYYKEHKSIR-----------QNNTGCANLPQ------- t77 IRIHIRVPGSE-SENHVVMYCVYYKEHKSIR-----------QNNTGCANLPQ------- t88 -FIV--MYYAI-LRNKIMQYCFYYKENRSVR-----------LNNTGRAELPK------- t37 -FIV--MYVAI-LRNDIMQYCFYYKENRSVR-----------LNNTGRAELPK------- t35 -FIV--MYVAI-LRNDIMQYCFYYKENRSVR-----------LNNTGRAELPK------- t48 -FIV--MYVAI-LRNDIMQYCFYYKENRSVR-----------LNNTGRAELPK------- t55 -FIV--MYVAI-LRNDIMQYCFYYKENRSVR-----------LNNTGRAELPK------- t46 -FIM--MYVAM-LRNDIMQYCFYYKENRSVR-----------LNNTGRAELPK------- t67 -FIV--MYVAI-LRNDIMQYCFYYKENRSVR-----------LNNTGRAELPK------- t57 -FIV--MYVAI-LRNDIMQYCFYYKENRSVR-----------LNNTGRAELPK------- t56 -III--MYYAI-LRNEIMQYCFYYKENRSVR-----------INNTGQAELPK------- t13 -VIV--MYGSI-LRNDVMQYVFYYRENRSVR-----------QNNTGNAELPK------- t5 -VIV--MYGSI-LRNDVMQYVFYYRENRSVR-----------QNNTGNAELPK------- t38 -VIV--MYVSI-LRNDVMQYVFYYRENRIVR-----------QNNTGVAELPK------- t33 -VIV--MYVSI-LRNDVMQYVFYYRENRIVR-----------QNNTGVAELPK------- t100 -VIV--MYVSI-LRNDVMQYVFYYRENRIVR-----------QNNTGVAELPK------- t34 -VIV--MYVSI-LRNDVMRYVFYYRENRIAR-----------QNNTGVAELPK------- t84 -VIV--MYVSI-LRNDVMRYVFYYRENRIAR-----------QNNTGVAELPK------- t92 -VIV--MYVSI-LRNDVMRYVFYYRENRIAR-----------QNNTGVAELPK------- t3 -VIC--MYVSI-LCNVVMQYVFYYKENRSVR-----------QNNTGKAELPK------- t22 -VIC--MYVSI-LRNVVMQYVFYYKENRSVR-----------QNNTGAAELPK------- t64 -VIC--MYVSI-LRNDVMQYVFYYKENRSVR-----------QNNTGKAELPK------- t18 -VIC--MYVSI-LRNDVMQYVFYYKENRSVR-----------QNNTGKAELPK------- t68 -VIC--MYVSI-LRNDVMQYVFYYKENRSVR-----------QNNTGKAELPK------- t28 -VIC--MYVSI-LRNDVMQYVFYYKENRSVR-----------QNNTGKAELPK------- t82 -VIC--MYISI-LRNDVMQYVFYYKENRSVR-----------QNNTGKAELPK------- t41 -VIC--MYISI-LRNDVMQYVFYYKENRSVR-----------QNNTGKAELPK------- t71 -VIV--MWISI-LRNDVMQYVVYYKENRSVR-----------QNNTGKAELPR------- t94 -VIV--MWISI-LRNDVMQYVFYYKENRSVR-----------QNNTGKAELPR------- t99 -VIC--MYVSI-LRNDVMQYVFYYKENRSVR-----------QNNTGKAELPK------- t40 -VIC--MYISI-LRNNVMQYVFYYKENRSVR-----------QNNTGKAELPK------- t90 -VIC--MYISI-LRNNVMQYVFYYKENRSVR-----------QNNTGKAELPK------- t4 -VIC--MYVSI-LRNVVMQYVFYYKENRSVR-----------QNNTGKAELPK------- t36 IKIY--MWGTINFKNRVMQYCNYYKENRSVR-----------INNTGLAELPK------- t87 IKIY--MWGTICFKNRVMQYCNYYKENRSVR-----------INNTGLAELPK------- t89 IEIY--MWGTINFKNQVMQYCNYYKENRSVR-----------INNTGGAELPK------- t39 IEIY--MWGTINFKNQVMQYCNYYKENRSVR-----------INNTGGAELPK------- t8 IEIY--MWGTINFKNQVMQYCNYYKENRSVR-----------INNTGGAELPK------- t6 IEIY--MWGTINFKNQVMQYCNYYKENRSVR-----------INNTGGAELPK------- t58 IKIH--MWGTINFKNKVMQYCTYYKENRSVR-----------INNTGGAELPK------- t54 IKIH--MWGTINFKNKVMQYCTYYKENRSVR-----------INNTGGAELPK------- t65 IHIV--MWADIMFKNCVMQYCTYYKENRSVR-----------ANNTGGAELPK------- t29 IHIV--MWADIMFKNCVMQYCTYYKENRSVR-----------ANNTGGAELPK------- t98 IHIV--MWADIVFKNCVMQYCTYYKENRSVR-----------ANNTGGAELPK------- t83 IHIV--MWADIMFKNCVMQYCTYYKENRSVR-----------ANNTGGAELPK------- t95 IHII--MWEEIMFKNCVMQYCTYYKENRSVR-----------ANNTGGAELPK------- t20 IHII--MWEDIMFKNCVMQYCTYYKENRSVR-----------ANNTGGAELPK------- t69 IHIV--MWADIMFKNCVMQYCNYYKENRSVR-----------VNNTGGAELPK------- t2 IHIV--MWADIMFKNCVMQYCNYYKENRSVR-----------VNNTGGAELPK------- t10 IHIV--MWADIMFKNCVMQYCNYYKENRSVR-----------VNNTGGAELPK------- t31 YHIV--MWADIMFKNCVMQYSTYYKENRSVR-----------ANNTGGAELPK------- t15 YHIV--MWADIMFKNCVMQYSTYYKENRSVR-----------GNNTGGAELPK------- t63 YHIV--MWADIMFKNCVMQYSTYYKENRSVR-----------VNNTGGAELPK------- t50 YHIV--MWADIMFKNCVMQYSTYYKENRSVR-----------VNNTGGAELPK------- t25 YHIV--MWADIMFKNCVMQYSTYYKENRSVR-----------VNNTGGAELPK------- t51 YHIV--MWADIMFKNCVMQYATYYKENRSVR-----------VNNTGGAELPK------- t9 IGIC--MWETIHFKNPVMQYCCYYKENRSVR-----------VNNTGGAALPK------- t47 IGIC--MWDTIEFKNPVMQYCCYYKENRSVR-----------VNNTGGAELPK------- t60 IGIC--MWDTIHFKNPVMQYCCYYKENRSVR-----------VNNTGGAELPK------- t30 IGIC--MWDTIHFKNPVMQYCCYYKENRSVR-----------VNNTGGAELPK------- t70 IGIC--MWDTIHFKNPVMQYCCYYKENRSVR-----------VNNTGGAELPK------- t91 IGIC--MWDTIHFKNPVMQYCCYYKENRSVR-----------VNNTGGAELPK------- t49 IGIC--MWDTIHFKNPVMQYCCYYKEPRSVR-----------VNNTGGAELPK------- t52 IGIG--MWDSIHFKNPVMQYCCYYKENRSVR-----------VNNTGGAELPK------- t43 ICIC--TWGTIHFKNFVMQYCVYYKENRSVR-----------VNNTGGAELPQ------- t1 ICIC--TWGTIHFKNSVMQYCCYYKENRSVR-----------VNNTGGAELPK------- t86 ICIC--GVGTIHFKNLVMQYCCYYKENRSVR-----------VNNTGGAELPK------- t72 ICIC--TWGTIHFKNSVMQYCCYYKENRSVR-----------VNNTGGAELPK------- t96 ICIC--TWGTIHFKNSVMQYCCYYKENRSVR-----------VNNTGGAELPK------- t44 ICIC--TWGTIHFKNSVMQYCCYYKENRSVR-----------VNNTGGAELPK------- t45 ICIC--TWGTIHFKNSVVQYCCYYKENRSVR-----------VNNTGGAELPK------- t74 ICIC--TWGTIHFKNSVVQYCCYYKENRSVR-----------VNNTGGAELPK------- t26 IKIC--TWGTIHFKNSVMQYCCYYKENRSVR-----------LNNTGGAELPK------- t61 IKIC--TWGTIHFKNSVMQYCCYYKENRSVR-----------FNNTGGAELPK------- t97 IKIC--TWPTIHFKNGMMQYCCYYKENRSVR-----------FNNTGGAELPK------- t17 IKIC--TWGTIHFKNFVMQYCVYYKENRSVR-----------FNNTGGAELPK------- t11 IKIC--TWGTIHFKNFVMQYCVYYKENRSVR-----------FNNTGGAELPK------- t23 IAIV--MWGTINLKNHVMQYCCYYKETRSVR-----------VNNTGGAELPK------- t76 IAEP--LYLVI-YVNAVEPYTDAYKKPKSMHQFQFDDCQIRYRTNTGFEETPVGATHLTH t53 IAEP--LYLVI-YVNAVEPYTDAYKKPKSMHQFQFDDCQIRYRTNTGFEETPMGATHLTH t16 IAEP--LYLVI-YVNAVEPYTDAYKKPKSMHQFQFDDCQIRYRTNTGFEETPMGATHLTH t42 ITEP--LYLII-YVNAVEPYTDAYKKPKSMHQFQFDDVQIRYRTNTGFEENPKLATHLVH t24 IAEP--LYLII-YVNAVEPYTDAYKKPKSMHQFQFDDVQIRYRTNTGFEENPKLATHLVH t73 --------------DRLVNSFVD-ELYNSI---YTAAPKKK--RHL-GIGDNGGMELVRE t66 --------------DRLVNSFVD-ELYNSI---YTAAPKKK--RHL-GIGDNGGMELVRE t32 --------------DRLVNSFVD-ELYNSI---YTAAPKKK--RHL-GIGDNGGMELVRE t75 --------------DRLVNNFVD-ELYNSV---YTAAPKKK--RHL-GIGDNGGMELVRE t79 --------------ARLVNKVVD-ELYNNV---YHAIPKKE--MNQ-GVGDSGGVEMVQE t27 --------------ARLVNKVVD-ELYNNV---YHAIPKKD--MNQ-GVGDSGGVEMVQE t93 --------------ARLVNKVVD-ELYNNV---YHAIPKKD--MNQ-GVGDSGGVEMVQE t59 --------------HRLVNKVVD-ELYNNV---YHAIPKKD--MNQ-GVGDSGGVEMVQE t7 --------------HRLVNKVVD-ELYNNP---YHAIPKKD--MNQ-GVGDSGGIEMVQE t78 --------------HRLVNKVVD-ELYNNP---YHAIPKKD--MNQ-GVGDSGGIEMVQE t12 --------------HRLVNKVVD-ELYNNV---YHAIPKKD--INV-GVGDSGGMEVVQE t81 --------------HRLVVKVAD-ELYNNI---YHAIPKKD--GVLLGIGDSGGMEMVQE t21 --------------HRLVVKVAD-ELYNNI---YHAIPKKD--GVLLGIGDSGGMEMAQE t80 --------------HRLVDQVVD-ELYSKF---YHAIPKKP--GNM-GVGDSGGMEMVQE t14 --------------HRLVDQVVD-ELYSKF---YHAIPKKP--GNM-GVGDSGGMEMVQE t85 --------------HRLVDKVVD-ELYSKF---YHAIPKKP--GNM-GIGDSGGMEMVQE t62 --------------HRLVDRVVD-ELYSKF---YHAIPKKP--GNM-DVGDSGGMEMVME t19 --------------HRLVDKVVD-ELYSKF---YHAIPKKP--GNM-DVGDSGGMEMVME t77 --------------HRLVDKVVD-ELYSKF---YHAIPKKP--GNM-DVGDSGGMEMVME t88 --------------HHMQNIFID-ELYPGG---YHAAPKKE--EMV-GPGDNGGVEDIKE t37 --------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-SPGDNGGVEAIRE t35 --------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-SPGDNGGVEAIRE t48 --------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-SPGDNGGVEAIRE t55 --------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-SPGDNGGVEAIRE t46 --------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-APGDNGGVEAIRE t67 --------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-APGDNGGVEAIRE t57 --------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-APGDNGGVEAIRE t56 --------------HHVQNVFID-ELYPGH---YHAAPKKN--ENV-GPGDNGGVEGIKE t13 --------------HHMQDVFID-ELYLGP---YHASPKKD--RFC-NPGDNGGMETYQE t5 --------------HHMQDVFID-ELYLGP---YHASPKKD--RFC-NPGDNGGMDTYQE t38 --------------HHMQDVFID-ELYLGP---YHASPKKE--RFC-NVGDNGGMETIQE t33 --------------HHMQDVFID-ELYLGP---YHASPKKE--RFC-NPGDNGGMETIQE t100 --------------HHMQDVFID-ELYLGP---YHASPKKE--RFC-NPGDNGGMETIQE t34 --------------HHMQDIFID-ELYLGP---YHASPKKQ--RFC-NPGENGGMETIRE t84 --------------HHMQDIFID-ELYLGP---YHASPKKE--RFC-NPGENGGMETIRE t92 --------------HHMQDIFID-ELYLGP---YHASPKKE--RFC-NPGENGGMETIRE t3 --------------HHMQDIFID-ELYPGQ---YHASPKKK--KIC-GPGDNGGMETIEE t22 --------------HHMQDIFID-ELYPGL---YHASPKKK--KVC-GPGDNGGMETIQE t64 --------------HHMQDIVID-ELYPGP---YHATPKKH--RFC-GTGDNGGMQAIQE t18 --------------HHMQDIFID-ELYPGP---YHATPKKH--RFC-GTGDNGGMQAIQE t68 --------------HHMQDIFID-ELYPGP---DHATPKKH--RFC-GTGDNGGMQTIQE t28 --------------HHMQDIFID-ELYPGH---YHATPKKN--RYC-GPGDNGGMQVIEE t82 --------------HHMQDIFID-ELYPGH---YHATPKKN--RFC-GPGDNGGMQVIEE t41 --------------HHMQDIFID-ELYPGH---YHATPKKN--RFC-GPGDNGGMQVIEE t71 --------------HPMQEIFID-ELYPGH---YHATPKKN--RFC-GPGDNGGMQSMQE t94 --------------HPVQDIFID-ELYPGH---YHATPKKV--RFC-GPGDNGGMQAIQE t99 --------------HHMQDIFID-ELYPGR---YHATPKKN--RYC-GPGDNGGMQPIQE t40 --------------HHMQDIFID-ELYPGH---YQATPKKD--KFC-GPGDNGGMQTIQE t90 --------------HHMQDIFID-ELYPGH---YHATPKKD--KFC-GPGDNGGMQTIQE t4 --------------HHMQDIFID-ELYPGH---YHATPKKN--RCC-GPGDNGGMQTVQE t36 --------------YHHQNILID-ELYSNV---YPAAPKKH--QYM-GVGDVGGYEVICE t87 --------------YHHQNILID-ELYSNV---YPAAPKKH--QYM-GVGDVGGYEMICE t89 --------------YHHQNILID-ELYNNI---YPAAPKKH--KFL-GVGDVGGYEIICE t39 --------------YHHQNILID-ELYNNI---YPAAPKKH--KFL-GVGDVGGYEIICE t8 --------------YHHQNILID-ELYNDI---YPAAPKKH--KFL-GVGDVGGYEIICE t6 --------------YHHQNILID-ELYNNI---YPAAPKKH--KFL-GVGDVGGYEIICE t58 --------------YHHQNILID-ELYSNI---YPATPKKH--QYM-GVGDVGGYEVICE t54 --------------YHHQNILID-ELYSNI---YPATPKKH--QYM-GVGDVGGYEVICE t65 --------------YHQQNILTD-ELYSDV---YPAAQKKY--QVM-GVGDIGGYELICE t29 --------------YHQQNILTD-ELYSDL---YPAAPKKY--QEM-GVGDIGGYELIVE t98 --------------YHQQNLLTD-ELYSDI---YPAAPKKY--QVM-GVGDIGGYELIVE t83 --------------YHQQNLLTD-ELYSDI---YPAAPKKY--QVM-GVGDIGGYELIVE t95 --------------YHQQNILTD-ELYSDI---YPAAPKKY--QVM-GVGDIGGYELICE t20 --------------YHQQNILTD-ELYSDI---YPAAPKKY--QVM-GVGDIGGYELICE t69 --------------YHQQNILTD-ELYSDI---YPAAPKKY--QIM-GVGDIGGYELICE t2 --------------YHQQNILTD-ELYSDI---YPAAPKKY--QIM-GVGDIGGYELICE t10 --------------YHQQNILTD-ELYSDI---YPAAPKKY--QIM-GVGDIGGYELICE t31 --------------YHQQNILTD-ELYSDM---YPAAPKKYVIRVM-GVGDIGGYEMICE t15 --------------YHQQNILTD-ELYSDV---YPTAPKKYVIRVM-GVGDIGGYELICE t63 --------------YHQQNILTD-ELYSDM---YPAAPKKYVIRQM-GMGDIGGYELICE t50 --------------YHQQNILTD-ELYSDM---YPAAPKKYVIRQM-GMGDIGGYELICE t25 --------------YHQQNILTD-ELYSDM---YPAAPKKYVIRQM-GMGDIGGYELICE t51 --------------YHQQNILTD-ELYSDM---YPAAPKKYVIREM-GVGDIGGYELICE t9 --------------YHHQNTLVD-ELYSDV---YPAAPKKK--VCM-GVGDVGGYEVMGE t47 --------------YHHQNTLVD-ELYSDI---YPAAPKKK--VCM-GVGDVGGYEVMGE t60 --------------YHHQNTLVD-ELYSDI---YPAAPKKK--VCM-GVGEVGGYEVMGE t30 --------------YHDQNTLVD-ELYSDV---YPAAPKKK--VCM-GVGDVGGYEVMGE t70 --------------YHNQNTLVD-ELYSDV---YPAAPKKQ--HYM-GVGDVGGYEVMGE t91 --------------YHNQNTLVD-ELYSDV---YPAAPKKQ--HYM-GVGDVGGYEVMGE t49 --------------YHNFNALVD-ELYSDV---YPAAPKKK--HYM-GVGDVGGYEVMGE t52 --------------YHHQNSLVD-ELYSDV---YPAAPKKK--HYM-GVGDVGGYEVMGE t43 --------------YHHQNILVD-ELYSDM---YPAAPKKK--HYV-GAGDVGGYEVMSE t1 --------------YHHQNILVD-ELYSDM---YPAAPKKK--HYV-GAGDVGGYEVMSE t86 --------------YHHQNILVD-ELYSDM---YPAAPKKK--HYV-GAGDVGGYEVMSE t72 --------------YHHQNILVD-ELYSDM---YPAAPKKK--HYV-GAGDVGGYEVMSE t96 --------------YHHQNILVD-ELYSDV---YPAAPKKK--HYV-GAGDVGGYEVMSE t44 --------------YHHQNMLVD-ELYSDM---YPAAPKKK--VYV-GAGDVGGYEVMSE t45 --------------YHHQNILVD-ELYSDM---YPAAPKKK--HYV-GAGDVGGYEVMSE t74 --------------YHHQNILVD-ELYSDM---YPAAPKKK--HYV-GAGDVGGYEVMSE t26 --------------YHHQNVLVD-ELYGEA---YPAAPKKK--HYM-GAGDVGGYEVMSE t61 --------------YHHQNVLVD-ELYSEL---YPAAPKKA--HYM-GAGDIGGYEVMSE t97 --------------YHHQNVLVD-ELYSEV---YPAAPKKA--FYM-GAGDIGGYEVMSE t17 --------------YHHQNALVD-ELYSDV---YPAAPKKK--HYM-GEGDVGGYEVMSE t11 --------------YHHQNVLVD-ELYSDG---YPAAPKKK--HYM-GEGDVGGYEVMSE t23 --------------YHYQNILVD-ELYSND---YPAAPKKA--QFM-GVGDVGGYEIVCE t76 VCVCVPHPWTKGKSEILQNMGSA-VLYNDVIRDHNASEQKE--AHV-PMGDEGRISRAKD t53 VCVCVPHPWTKGKSEILQNMGSA-VLYNDVIRDHNASEQKE--AHV-PMGDQGRISRAKD t16 VCVCVPHPWTKGKSEILQNMGSA-VLYNDVIRDYNASEQKE--AHV-PMGDQGRISRAKD t42 VCVVVPHPWTKGKSEILQNMESA-HLYNAVIRDNNASEQKE--AHV-PVGDEGRISRAKN t24 VCVVVPHPWTKGKSEILQNMESARHLYNAVIRDNNASHQKE--AHV-PAGDQGRISRAKN t73 DIFQILVCV t66 DIFQILVCV t32 DIFQILVCV t75 DIFQILVCV t79 DVFQILVCI t27 DVFQILVCI t93 DVFQILVCI t59 DVFQILVCI t7 DVFQILVCI t78 DVFQILVCI t12 DVFQILVCI t81 DLFQILVCA t21 DLFQILVCV t80 DVFQILVCL t14 DVFQILVCL t85 DLFQILVCI t62 DVFQILVCV t19 DVFQILVCV t77 DVFQILVCA t88 DMFQVLVCR t37 DMFQVLVCK t35 DMFQVLVCK t48 DMFQVLVCK t55 DMFQVLVCK t46 DMFQVLVCK t67 DMFQVLVCK t57 DMFQVLVCK t56 DMFQVLVCQ t13 DMFQVLVCV t5 DMFQVLVCV t38 DMFQVLVCI t33 DMFQVLVCV t100 DMFQVLVCV t34 DMFQVLVCV t84 DMFQVLVCV t92 DMFQVLVCV t3 DMFQVLVCV t22 DMFQVLVCV t64 DMFQVLVCV t18 DMFQVLVCV t68 DMFQVLVCV t28 DMFQVLVYV t82 DMFQVLVYV t41 DMFQVLVYV t71 DVFQVLVCV t94 DVFQVLVCV t99 DMFQVLVCV t40 DMFQVLVCV t90 DMFQVLVCV t4 DMFQVLVCV t36 NLFQILVVE t87 NLFQILVVE t89 NLFQILVVE t39 NLFQILVVE t8 NLFQILVVE t6 NLFQILVVE t58 NLFQILVVE t54 NLFQILVVE t65 DLFQILVCQ t29 DLFQILVCE t98 DLFQILVCE t83 DLFQILVCE t95 DLFQILVCE t20 DLFQILVCE t69 DLFQILVCE t2 DLFQILVCE t10 DLFQILVCE t31 DLFQILVCE t15 DLFQILVCQ t63 DLFQILVCE t50 DLFQILVCE t25 DLFQILVCE t51 DLFQILVCE t9 DLFQILVCE t47 DLFQILVCE t60 DLFQILVCE t30 DLFQILVCE t70 DLFQILVCE t91 DLFQILVCE t49 DLFQILVCE t52 DLFQILVCE t43 DLFQILVHE t1 DLFQILVHE t86 DLFQILVHE t72 DLFQILVHE t96 DLFQILVHG t44 DLFQILVHA t45 DLFQILVHE t74 DLFQILVHE t26 DLFQILVHE t61 DLFQILVHE t97 DLFQILVHK t17 DLFQILVHA t11 DLFQILVHA t23 DIFQILVCD t76 EIMYIRDLE t53 EIMYIRDLE t16 EIMYIRDLE t42 EILYIRDLE t24 EILYIRDLE bpp-seq-2.1.0/test/example.ph000644 000000 000000 00000503237 12147656566 016036 0ustar00rootroot000000 000000 100 1209 t73 LIVSQIRVMV RDG--IHKAM DEE-V----- -------LIA RRVKPYSGNG t66 LIVSQIRVMV RDG--IHKAM DEE-V----- -------LIA RRVKPCSGNG t32 LIVSQIRVMV RDG--IHKAM DEE-V----- -------LIA RRVKPYSGNG t75 LIVSEIRVMV RDE--VHKAM DEE-C----- -------LIA RRVKPYSGNG t79 LIVSEVRVMV RDG--LNIAI DEL-C----- -------LIA NRVKTFSGSQ t27 LIVSEVRVMV RDG--LNIAI DEL-C----- -------LIA NRVKTMSGSQ t93 LIVSEVRVMV RDG--LNIAI DEL-C----- -------LIA NRVKTMSGSQ t59 LIVSEVRVMV RDG--LNIAI DEL-C----- -------LIA NRVKTFSGSQ t7 LIVSEVRVMV RDG--LNIAI DEL-C----- -------LIA NRVKKFSGSQ t78 LIVSEVRVMV RDG--LNIAI DEL-C----- -------LIA NRVKTFSGSQ t12 LIVSEVRVMV RDG--INIAI DEL-C----- -------LIA NRVKAFSGHQ t81 LIVSEVRHMV RDG--ANVAI DEL-C----- -------LIA CRVKAFSGHG t21 LIVSEVRHMV RDG--ANIAI DEL-C----- -------LIA CRVKAFSGHG t80 LIVSEVRHMV RDG--VNIAV DEI-C----- -------LIA NRVKSMSGQG t14 LIVSEVRHMV RDG--VNIAV DEI-C----- -------LIA NRVKSMSGQG t85 LIVSEVRHVV RDG--VNIAV DEI-C----- -------LIA NRVKSMSGQG t62 LIVSEVRVMV RDG--IHIAV DEI-C----- -------LIA NRVKCMSGQG t19 LIVSEVRHMV RDG--INIAV DEI-C----- -------LIA NRVKCVSGQG t77 LIVSEVRHMV RDG--INIAV DEI-C----- -------LIA NRVKSMSGQG t88 LIVSECRLII RDG--NHDAI DEM-CCVAND LNNEIERLVA SMVKSFRGHD t37 LIVSDMRLII REG--NDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD t35 LIVSDMRLII REG--NDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD t48 LIVSDMRLII REG--SDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD t55 LIVSDMRLII REG--NDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD t46 LIVSDMRLII REG--NDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD t67 LIVSDMRLII REG--NDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD t57 LIVSDMRLII REG--NDDAI DEM-CCVANE LNNVIERLVA SMVKEFRGHD t56 LIVSEWRLFI RDG--HDDAI DEM-CCEANE LNNDIEKLVA SMVKGFRGHD t13 GIVSECRMII RDE--HDDAI DEM-C----- -------LVA SMVKKLSGCE t5 GIVSECRMII RDE--HDDAI DEM-C----- -------LVA SMVKKLSGCE t38 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCE t33 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCE t100 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCE t34 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCE t84 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCD t92 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCE t3 GIVSECRMII RDE--SDDAI DEM-C----- -------LVA SMVKKLSGCE t22 GIVSECRVII RDQ--SDDAI DER-Y----- -------LVA SVVKRLSGCE t64 GIVSECRMII RDE--TDDAI DEV-C----- -------LVA SMVKKLSGCE t18 GIVSECRMII RDE--TDDAI DEV-C----- -------LVA SMVKKLSGCE t68 GIVSECRMII RDE--TDDAI DEV-C----- -------LVA SMVKKLSGCE t28 GIVSECRMII RDE--ADDAI DEM-C----- -------LVA SMVKKLSGCE t82 GIVSECRMII RDE--ADDAI DEM-C----- -------LVA SMVKKLSGCE t41 GIVSECRMII RDE--ADDAI DEM-C----- -------LVA SMVKKLSGCE t71 GIVSECRMII RDQ--SDDAI DEMVV----- -------LVA SMVKEMSGCE t94 GIVSECRMII RDQ--SDDAI DEMVC----- -------LVA SMVKELSGCE t99 GIVSEVRMII RDE--SDDAI DEM-C----- -------LVA SMVKALSGCE t40 GIVSECRMII RDE--SDDAI DEV-C----- -------LVA SMVKALSGCE t90 GIVSECRMII RDE--SDDAI DEV-C----- -------LVA SMVKALSGCE t4 GIVSECRMII RDE--PDDAI DEV-C----- -------LVA SMVKELSGCE t36 MIWSEVRKMV RER--REQAI DNK-C----- -------LIA ARVKMMSGYV t87 MIWSEVRKMV RER--REQAI DNK-C----- -------LIA ARVKMMSGYM t89 MIWSEHRKMV REG--REQAI DNK-C----- -------LIA KRVKVMSGYI t39 MIWSEHRKMV REG--REQAI DNK-C----- -------LIA KRVKVMSGYI t8 MIWSEHRKMV REG--REQAI DNK-C----- -------LIA KRVKVMSGYI t6 MIWSEHRKMV REG--REQAI DNK-C----- -------LIA KRVKVMSGYI t58 MIWSEYRHMV REG--RDQAI DNK-C----- -------LIA ARVKVMSGYV t54 MIWSEYRHMV REG--RDQAI DNK-C----- -------LIA ARVKVMSGYV t65 MIWSEYRQMV REG--NDQSI DNK-C----- -------LIA DRVKMMSGVV t29 MIWSEYRQMV REG--NDQSI DNK-C----- -------LIA ARVKMMSGVV t98 MIWSEHRQMV REG--NDQSI DNK-C----- -------LIA ARVKMMSGVV t83 MIWSEYRQMV REG--NDQSI DNK-C----- -------LIA ARVKMMSGVV t95 MIWSEYRQMV REG--TDQSI DNK-C----- -------LIA ARVKMMSGVV t20 MIWSEYRQMV REG--TDQSI DNK-C----- -------LIA ARVKMMSGVV t69 MIWSDYRQMV REG--GDDSI DNK-C----- -------LIA ARVKMMSGVV t2 MIWSDYRQMV REG--GDDSI DNK-C----- -------LIA ARVKMMSGVV t10 MIWSDYRQMV REG--GDDSI DNK-C----- -------LIA ARVKMMSGVV t31 MIWSEYRSMV REG--ADQSI DNK-C----- -------LIA ARVKMMSGVV t15 MIWSEYRSMV REG--SDQSI DNH-C----- -------LIA ARVKMMSGVV t63 MIWSEYRSMV REG--SDQSI DNK-C----- -------LTA ARVKLMSGVV t50 MIWSEYRSMV REG--SDQSI DNK-C----- -------LTA ARVKLMSGVV t25 MIWSEYRSMV REG--SDQSI DNK-C----- -------LTA ARVKMMSGVV t51 MIWSEYRSMV REG--SDQSI DNK-C----- -------LIA ARVKMMSGVV t9 MIWSECRQMV REK--TDNAI DNQ-C----- -------LIA ARVKENSGHV t47 MIWSECRQMV REK--SDNAI DNQ-C----- -------QIA ARVKRNSGHV t60 MIWSECRQMV REK--TDNAI DNQ-C----- -------QIA ARVKENSGHV t30 MIWSECRQMV REK--TDNAI DNQ-C----- -------QIA ARVKENSGHV t70 MIWSECRQMV REK--NDNAI DNQ-C----- -------LIA ARVKEDSGHV t91 MIWSECRQMV REK--NDNAI DNQ-C----- -------LIA ARVKEDSGHV t49 MIWSECRQMV REK--NDNAI DNQ-C----- -------LIA ARVKENSGHV t52 MIWSEERQMV REK--VDNAV DNQ-C----- -------LIA ARVKENCGHV t43 MIWSEVRQMV REG--SDNAI DNR-C----- -------LIA ARVKHVQGHV t1 MIWSEVRQMM REG--TDNAI DNQ-C----- -------LIA ARVKHMQGHV t86 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKPVQGHV t72 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKHVQGHV t96 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKHVQGHV t44 MIWSEVRQMV REG--TDNAI DTR-C----- -------LIA ARVKHMQGHV t45 MIWSEVRQMV REG--TDNAI DNC-C----- -------LIA ARVKHMQGHV t74 MIWSEVRQMV REG--TDNAI DNC-C----- -------LIA ARVKHMQGHV t26 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKHMSGHV t61 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKHMSGHV t97 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKHMSGHM t17 MIWSEVRQMV REG--ADNAI DNR-C----- -------LIA ARVKHFSGHT t11 MIWSEVRQMV REG--TDNAI DNR-C----- -------LIA ARVKHFSGHT t23 MIWSECQRML REH--EDQAI ANE-C----- -------LIA FKVKLVNGHV t76 LSSSHEVRVV NHKSKPDEHI ADK-D----- -------MCA DAMCNGSGHT t53 LSSSHEVRVV NHKSKPDEHI ADK-D----- -------MCA DAMCNGSGHT t16 LSSSHEVRVV NHKSKPDEHI ADK-D----- -------MCA DAMCNGSGHT t42 LSSSHEMRVV HHKSKPDEEI ADQ-D----- -------LCA EEMCKGSGHT t24 QSSSHEMRVV HHKSKPDEEI TDQ-D----- -------LCA EEMCKGSGHT TQVRNDVEDA N--CQEFVGI ---RELGKH- YK-CMDGFHT VNNGAGEN-S TQVRNDVEDA N--CQEFVGI ---RELGKH- YK-CMDGFHT VNNGAGEN-S TQVRNDVEDA N--CQEFVGI ---RELGKH- YK-CMDGFHT VNNGAGEN-S NQIRNDIEDA N--GQEFVGI ---RELGKH- YD-CMDGYHT VNNGAGEN-S NQVRNAVEDA A--RPDFVGT ---RELGKQ- YE-CMDGVGA VDTGAGDN-S NQVRNAVEDA A--RPDFVGI ---RELGKQ- YE-CMDGVGA VDTGAGDN-S NQVRNAVEDA A--RPDFVGI ---RELGKQ- YE-CMDGVGA VDTGAGDN-S NQVRNAVEDA A--RPDFVGI ---RELGKQ- YE-CMDGVGA VDTGAGDN-S GQVRNAIEDA A--RPDFVGI ---RELGKQ- YE-CMDGVGA VDTGAGDN-S GQVRNAIEDA A--RPDFVGI ---RELGKQ- YE-CMDGVGA VDTGAGDN-S NQVRNAMEDA Q--RPDFVGI ---RELGKQ- YQ-CMDGHGA VDTGAGRN-S HQVRNAVEDA A--RPDFIGI ---RELGKP- Y--CMDGHGA VNTGAGHN-S NQVRNAVEDA P--RPDFIGV ---RELGKP- Y--CMDGHGA VNTGAGVN-S NQVRNAMEMA A--RQNFVGM ---RELEKV- YE-CMDGQGA VNTEAGNN-S NQVRNAMEMA A--RQNFVGM ---RELEKV- YQ-CMDGQGA VNTEAGNN-S NQVRNAMEMA A--RQNFVGM ---RELDKA- YQ-CMDGQKA VNTEAGNN-S NQARNAMEMA A--RQNFVGM ---RELGKQ- YQ-CMDGQGA VNTEAGNN-S NQARNAMEMA A--RQNFVGM ---RELGKQ- YQ-CMDGQGA VNTEAGNN-S NQARNAMEMA A--RQNFVGM ---RELGKQ- YH-CMDGQGT VNTEAGNN-S SQARNNSECM R--SAPFIGV ---RELFKR- YHKCVEGAGC VHTVAGTP-S CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S CQARNNSECI T--AAAFIGT ---RELFKR- YPKCMEGARC VHTVAGPP-S SQARNNAECI I--AAPFIGV ---RELFKR- YIKCVEGAGC VYTVA-AP-S NQARNNHECA I--PPPFHGV ---REMFKRV YE-CMEGIGC VNTVAGNP-S NQARNNHECA I--PPPFHGV ---REMFKRV YE-CMEGIGC VNTVAGNP-S DQARNNHECA I--SHPFHGV ---REMFKR- YE-CMEGIGC VNTVAGNP-S DQARNNHECA I--SHPFHGV ---REMFKR- YE-CMEGIGC VNTVAGNP-S DQARNNHECA I--SHPFHGV ---REMFKR- YE-CMEGIGC VNTVAGNP-S DQARNNHECA I--SHPFHGV ---REMFKR- YE-CMEGIGC VNTVAGNP-S DQARNNHECA I--SHPFHGV ---REMFKR- YE-CMEGIGC VNTVAGNP-S DQARNNHECA I--SHPFHGV ---REMFKR- YE-CMEGIGC VNTVAGNP-S QQARNNRECA T--SIPFLGV ---RELWKR- YE-CMEGIGC VNTVAGKP-S NQARNNRECA I--SIPFLGV ---RELWKR- YE-CMEGIGC VNTVAGTP-S NQTRNNRECA IT-SPPFIGV ---RELFKR- YG-CMEGIGC VNTVAGMP-S NQTRNNRECA IT-SPPFIGV ---RELFKR- YG-CMEGIGC VNTVAGMP-S NQTRNNRECA I--SPPFIGV ---RELFKR- YG-CVEGIGC VNTVAGMP-S NQARNNRECA I--SPPFIGV ---RELFKR- YH-CMEGIGC VNTVAGMP-S NQARNNRECA I--SPPFIGL ---RELFKR- YV-CMEGIGC VNTVAGMP-S NQARNNRECA I--SPPFIGL ---RELFKR- YV-CMEGIGC VNTVAGMP-S NQARNNSECA I--SPQFIGV ---RELFKR- YR-CMEGIGC VNTVAGAP-S NQARNNRECA I--SPPFTGV ---RELFKR- YR-CMEGIGC VNTVAGIP-S NQARNNRECA I--SPPFIGV ---RELFKR- YG-CMEGIGC VNTVAGIP-S NQARNNRECA I--SPPFIGV ---RELFKR- YG-CMEGIGC VNTVAGIP-S NQARNNRECA I--SPPFIGV ---RELFKR- YG-CMEGIGC VNTVAGIP-S NQARNNRECA I--SPPFIGV ---RELFKR- YG-CMEGIGC VNTVAGMP-S LQVRNALEDA L--YVFFTGT KGTRELVKY- YM-CMEGHGC VNTMAKKS-S LQVRNALEDA L--YVFFTGN KGTRELVKY- YM-CMEGHGC VNTMAKKS-S LQVRNAPEDA L--YNFFTGH KGTRELVKY- YM-CMEGEGV VNTMAKKS-S LQVRNAPEDA L--YNFFTGH KGTRELVKY- YM-CMEGEGV VNTMAKKS-S LQVRNAPEDA L--YNFFTGH KGTRELVKY- YM-CMEGEGV VNTMAKKS-S LQVRNAPEDA L--YNFFTGH KGTRELVKY- YM-CMEGEGV VNTMAKKS-S LQVRNAPEDA L--YNFFTGH KGTRELVKY- YM-CMEGEGC VNTMAKKS-S LQVRNAPEDA L--YNFFTGH KGTRELVKY- YM-CMEGEGC VHTMAKKS-S RQVRNAPEDA L--YNFFTGQ KGTRELIKF- YF-CMEGCGC VNTMAKNS-S RQVRNAPEDA L--YNFFTGQ KGTRELIKF- YF-CMEGCGC VNTMAKNS-S RQVRNAPEDA L--YNFFTGQ KGTKELVKF- YF-CMEGCGC VNTMAKKS-S RQVRNAPEDA L--YNFFTGQ KGTKELIKF- YF-CMEGCGT VNTMAKDS-S RQVRNAPEDA L--YNFFTGQ KGTRELIKF- YF-CMEGCGC VNTMAKNS-S RQVRNAPEDA L--YNFFTGQ KGTRELIKF- YF-CMEGCGC VNTMAKNS-S RQVRNAPEDA L--YNFFTGH KGTRELVKY- YH-CMEGCGC VNTMAKSS-S RQVRNAPEDA L--YNFFTGQ KGTRELVKY- YH-CMEGCGC VNTMAKSS-S RQVRNAPEDA L--YNFFTGQ KGTRELVKY- YH-CMEGCGC VNTMAKSS-S RQVRNAPEDA L--YTFFTGQ KGTRELIKY- YF-CMEGCGC VNTMAKQS-S RQVRNAPEDA L--YTFFTGQ KGTRELIKY- YF-CMEGCGC VNTMAKNS-S RQVRNAPEDA L--YTFFTGQ KGKRELIKY- YF-CMEGCGC VNTMAKNS-S RQVRNAPEDA L--YTFFTGQ KGTRELIKY- YF-CMEGCGC VNTMAKNS-S RQVRNAPEDA L--YTFFTGQ KGTRELIKY- YF-CMEGCGC VNTMAKNS-S RQVRNAPEDA L--YTFFTGQ KGTRELIKY- YF-CMEGCGC VNTMAKNS-S KQVRNDSEDI L--YNHFTGK KGARELIKR- YV-CMEGHDC VNTVANVS-S KQVRNDSEDI L-CYHHFTGK KGARELIKR- YI-CMEGHDC VNTMANVS-S KQVRNDSEDI L-CYHHFTGK KGARELIKR- YI-CMEGHDC VNTMANVS-S KQVRNDSEDI L-CYHHFTGK KGARELIKR- YI-CMEGHDC VNTMANVS-S KQVRNASEDI M--YNHFTGK KGARELIKR- YI-CMEGHDC VNTMANVS-S KQVRNASEDI M--YNHFTGK KGARELIKR- YI-CMEGHDC VNTMANVS-S KQVRNASEDI M--YNHFTGN KGACELIKG- YV-CMEGHDC VNTMANIS-S KQVRNASEDI L--YNHFTGK KGARELIKR- YI-CMEGHDC VNTMAHDS-S KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHDC VNTMAKPS-S KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHDC VNTMAKPS-S KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHDC VNTMAKPS-S KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHDC VNTMAKVS-S KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHDC VNTMAKPS-S KQVRNANEDA L--YQNFSGQ KGAREKIKR- YL-CMEGHDC VNTMAKPS-S KQVRNANEDA L--YQNFTGQ KGAREKIKR- YK-CMEGHDC VNTMAKSS-S KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHDC VNTMAKPS-S KQVRNANEDA L--YQNFTGK KGAREKIKR- YL-CMEGHEC VVTMAKNS-S KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHEC VNTMAKNS-S KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGHEC VNTMAKNS-S KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGQDC VNTMAKSS-S KQVRNANEDA L--YQNFTGQ KGAREKIKR- YL-CMEGQDC VNTMAKSS-S KQVRNASEDA H--YTFFAGE KGARELIKR- YS-CMEGDGC VNTMAEGR-S GRIHNAVEGG I--VTNFHGF ---AEMPKN- HD-VMEEQGG PGTCLAPQQS GRIHNAVEGG I--VTNFHGF ---AEMPKN- YD-VMEEQGG PGTCLAPQQS GRIHNAVEGG I--VTNFHGF ---AEMPKN- YD-VMEEQGG PGTCLAPQQS GRIHSAVEGG I--WTNFHGF ---AEMPKN- YD-VMEEQGG PGTCIAPRQS GRIHSAVEGG I--WTNFHGF ---AEMPKN- YD-VMEEQGG PGTCVAPRQS ESAMWIFDLW MCKLNHGMQR --------GD IGRVKRDVQK FPKLKEGAPN ESAMWIFDLW MCKLNHGMQR --------GD IGRVKRDVQK FPKLKEGAPN ESAMWIFDLW MCKLNHGMQR --------GD IGRVKQDVQK FPKLKEGAPN ESAMWIFDLW MCKLNHGMQR --------GD IGRVQQDVHK FPKLKEGAPN ESAVVIFDIW MCQLPHGMQW --------GD IGRVVVDHPK FLKLEEGAPN ESAVVIFDIW MCQLPHGMQW --------GD IGRVVVDHPK FLKLEEGAPN ESAVVIFDIW MCQLPHGMQW --------GD IGRVVVDHPK FLKLEEGAPN ESAVVIFDVW MCQLPHGMQW --------GD IGRVVVDHPK FLKLEEGAPN ESAVVIFDIW MCQLPHGMQW --------GD IGRVVVDHPK FLKLEEGAPN ESAVVIFDIW MCQLPHGMQW --------GD IGRVVVDHPK FLKLEEGAPN ESAVWIFDIW MCKLAHGMQW --------GD IGRVSVDHPK FLKLEEGAPR ESAVWIFDIW MYKLSHGMQW --------GD IGRVGVDHPK FLKLEEGAPN ESAVWIFDIW MYKLSHGMQW --------GD IGRVDVDHPK FLKLEEGAPN ESAVWIFDIW MCKLTHGMQD FGD-----GD IGRVVVDHPK FEKLEEGAPN ESAVWIFDIW MCKLTHGMQD FGD-----GD IGRVVCDHPK FEKLEEGAPN ESAVWIFDIC MCKLTHGMQD --------GD IGRVVCDHPK FEKLEEGAPN ESAVWIFDIW MCRLTHGMQD --------GD IGRVECDHPK FAKLEEGAPN ESAVWIFDIW MCRLTHGMQD --------GD IGRVECDHPK FAKLEEGAPN ESAVWIFDIW LCRLTHGMQD --------GD IGRVECDHPK FAKLEEGAPN DSPVWMFDQF MCQLTHSMVD --------GD LGRVVQDNVI FAKLKEGAPH ESPVWMFDKF MCQLTHSMVN --------GD LGRVVHDNWT FTKLKEGAPH ESPVWMFDKF MCQLTHSMVN --------GD LGRVVHDNWT FTKLKEGAPH ESPVWMFDKF MCQLTHSMVN --------GD LGRVVHDNWT FTKLKEGAPH ESPVWMFDKF MCQLTHSMVN --------GD LGRVVHDNWT FTKLKEGAPH ESPVWMFDKF MCQLTHSMVN --------GD LGRVVHDNWT FTKLKEGAPH ESPVWMFDKF MCQLTHSMVN --------GD LGRVVMDNWT FTKLKEGAPH ESPVWMFDKF MCQLTHSMVN --------GD LGRVVMDNWT FTKLKEGAPH ESPVWMFDKF MCHLTHSMVV --------GD LGRVLRDNTT FAKLKEGAPV ESSAWMFDKV MYQLTHSMVW --------GD LRRVVHDNVT FSKLKEGAPH ESSAWMFDKV MYQLTHSMVW --------GD LRRVVHDNVT FSKLKEGAPH ESSAWMFDKF MYQLTHSMVW --------GD LGRVYYDNIT FSKLKEGAPH ESSAWMFDKF MYQLTHSMVW --------GD LGRVYYDNIT FSKLKEGAPH ESSAWMFDKF MYQLTHSMVW --------GD LGRVYYDNIT FSKLKEGAPH ESSAWMFDKF MYQLTHSMVW --------GD LGRVYYDNIT FSKLKEGAPH ESSAWMFDKF MYQLTHSMVW --------GD LGRVYYDNIT FSKLKEGAPH ESSAWMFDKF MYQLTHSMVW --------GD LGRVYYDNIT FSKLKEGAPH QSSVWMFDRF MYKLTHSMVW --------GD LGRVYWDNLT FQKLKEGAPH ESSVWMFDRF MYKLTHSMVW --------GD LGRVYVDNQT FSKLKEGAPH ESSVWMFDRF MYKLTHSMVW --------GD LRRVFPDNAT FSKLKEGAPH ESSVWMFDRF MYKLTHSMVW --------GD LRRVFPDNAT FSKLKEGAPH ESSVWMFDRF MYKLTHSMVW --------GD LRRVFPDNAH FSKLKEGAPH ESSVWMFDQF MYKLTHSMIW --------GD LGRVFPDNAT FSKLKEGAPH ESSVWMFDRF MYKLTHSMVW --------GD LGRVFPDNCT FSKLKEGAPH ESSVWMFDRF MYKLTHSMVW --------GD LGRVFPDNCT FSKLKEGAPH ESSVWMFDRF MYKLTHSMVW --------GD LARVFHDNGT FAKLKEGAPH ESSVWMFDRF MYRLTHSMVW --------GD LGRVFQDNAT FSKLKEGAPH ESSIWMFDRF MYKLTHSMVW --------GD LGQVFPDVST FSKLKEGAPH ESSVWMFDRF MYKLTHSMVW --------GD LGRVYPDNPT FSKLKEGAPH ESSVWMFDRF MYKLTHSMVW --------GD LGRVYPDNPT FSKLKEGAPH ESSVWMFDRF MYKLTHSMVW --------GD LGRVFPDNAT FSKLKEGAPH QSGIWKFNAF MC-LMHLMEG --------GD L--------- ---------- QSGIWKFNAF MC-LMHLMEG --------GD L--------- ---------- QSGIWKFNAF MC-LMRLMQG --------GD L--------- ---------- QSGIWKFNAF MC-LMRLMQG --------GD L--------- ---------- QSGIWKFNAF MC-LMRLMQG --------GD L--------- ---------- QSGIWKFNAF MC-LMRLMQG --------GD L--------- ---------- QSGIWKFNAF MC-LMHLMQG --------GD L--------- ---------- QSGIWKFNAF MC-LMHLMQG --------GD L--------- ---------- QSAPWKFNRF MC-LMHLMQS --------GD L--------- ---------- QSAPWKFNRF MC-LMHLMQS --------GD L--------- ---------- QSAPWKFNRF MC-LMHLMQS --------GD L--------- ---------- QSAPWKFNRF MC-LMHLMQS --------GD L--------- ---------- QSAPWKFNRF MC-LMHLMQL --------GD L--------- ---------- QSAPWKFNRF MC-LMHLMQL --------GD L--------- ---------- QSAPWKFNRF MC-LMHLMQE --------GD L--------- ---------- QSAPWKFNRF MC-LMYLMQD --------GD L--------- ---------- QSAPWKFNRF MC-LMYLMQD --------GD L--------- ---------- QSAPWKFNRF MC-LMHLMQN --------GD L--------- ---------- QSAPWKFNRF MC-LMHLMQN --------GD L--------- ---------- QSAPWKFNRF MC-LMHLMQD --------GD L--------- ---------- QSAPWKFNHF MC-LMHLMQN --------GD L--------- ---------- QSAPWKFNRF MC-LMHLMQN --------GD L--------- ---------- QSAPWKFNRF MC-LMHLMQN --------GD L--------- ---------- QSAIWKFDRF MCDLMHLMQN --------GD L--------- ---------- QSAVWNFDRF MCDLMHLMQN --------GD L--------- ---------- QSAVWNFDRF MCDLMHLMQN --------GD L--------- ---------- QSAVWNFDRF MCDLMHLMQN --------GD L--------- ---------- QSAVWKFDRF MCDLMHLMQD --------GD L--------- ---------- QSAVWKFDRF MCDLMHLMQD --------GD L--------- ---------- QSAVWKFDRF MCDLMHLMQD --------GD L--------- ---------- QSATWKFDRF MCDLMHLMQG --------GD L--------- ---------- QSAVWKFDRF MCALMHLMQK --------GD L--------- ---------- QSAVWKFDRF MCCLMHLMQN --------GD L--------- ---------- QSAVWKFDRF MCALMHLMQN --------GD L--------- ---------- QSAVWKFDRF MCALMHLMQN --------GD L--------- ---------- QSAVWKFDRF MCALMHLMQV --------GD L--------- ---------- QSAVWKFDRF MCDLMHLMQN --------GD L--------- ---------- QSAVWKFDRF MCDLMHLMQN --------GD L--------- ---------- QSAVWKFDRF MCDLMHLMQN --------GD L--------- ---------- ISAMWKFDRF MCDLMHLMQN --------GD L--------- ---------- VSAVWKFDQF MCDLMHLMQN --------GD L--------- ---------- VSAVWKFDRF MCDLMHLMQN --------GD L--------- ---------- QSAVWKFDRF MCDLVHLMQN ---GARGSGD L--------- ---------- QSAVWKFDRF MCDLVHLMQN ---GARGSGD L--------- ---------- ESAVWKFDPF MCQLTHSMQS --------GD L--------- ---------- ADTSWPFVMF TVDLAHRCRG --------GD MPQNTGDSMT CPHLAEGLAA ADTSWPFVMF TVDLAHRCRG --------GD IPQNTGDSMT CPHLAEGLAA ADTSWPFVMF TVDLAHRCRG --------GD IPQNTGDSMT CPHLAEGLAA ADTSWPFMVF TVDLAHRCRG --------GD TPTQAGDSMT FPHLAEGLPA ADTNWPFMVF PVELAHRCRG --------GD TP-------- FPHLAEGLPA CSSFVKPYFM GCDMFHKQVE YRGTHGLVLD DTWNEESATF PYPQVHSRSD CSSFVKPYFM GCDMFHKQVE YRGTHGLVLD DTWNEESATF PYPQVHSRSD CSSFVKPYFM GCDMFHKQVE YRGTHGLVLD DTWNEESATF PYPQVHSRSD CSSFVKPVFM GCDMFHRQVQ NRGNHGLVLD DTWNEESGTF PYPQVHSRNE CSSIPMPYFL GVDMFHREVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV CSSIPMPYFL GCDMFHREVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV CSSIPMPYFL GCDMFHREVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV CSSIPMPYFL GCDMFHREVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV CSSMPMPYFL GCDMFHREVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV CSSIPMPYFL GCDMFHREVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV CSSCPMPYFL GCEMFHRQVI LKGAKGLVLP D--NEDSMAF PYVQVHSRSV CSSLPMPYFL GVDMFHRQVT IKGAKGLVLP DIWNESPMAF PYAQVHSKSH CSSLPMPYFL GVDMFHKQVT LKGAKGLVLP DIWNESPMAF PYAQVHSKSH CSSHPVPYFL GIDMFHKQVM AKGSKGLILP DTWNEASMAF PYPQVHSKSH CSSHPMPYFL GVDMFHEQVM AKGSKGLILP DTWNEASMAF PYPQVHSKSH CSSIPMPYFL GIDMFHKQVM AKGSKGLILP DTWNEASMAF PYPQVHSKSH CSSLPMPYFL GIDMFHNQVM AKGSKGLILP DTWNEASMAF PYPQVHSKSH CSSLPMPYFL GIDMFHKQVM GKGSKGLILP DTWNEASMAF PYPQVHSKSH CSSLPMPYFL GIDMFHRQVM AKGSKGLILP DTWNEASMAF PYPQVHSKSH CISL--PHFL GIDMFHTQVF VGGH--LILP DPCYELSISV MYAGHASYNQ CIAL--PYFM GIDMFHIQVF VGGN--LILP DPCYELSISV MYAGHASYNQ CIAL--PYFM GIDMFHIQVF VGGN--LILP DPCYELSISV MYAGHASYNQ CIAL--PYFM GIDMFHIQVF VGGN--LILP DPCYELSISV MYAGHASYNQ CIAI--PYFM GIDMFHIQVF VGGN--LILP DPCYELSISV MYAGHASYNQ CIAL--PYFM GIDMFHIQVF VGGN--LILP DPCYELSISV MYAGHASYNQ CIAL--PYFM GIDMFHIQVF LGGD--LILP DPCYELSISV MYAGHASYNQ CIAL--PYFM GIDMFHIQVF IGGD--LILP DPCYELSISV MYAGHASYNQ CISL--PYFL GMDMFHQQVF MGGN--LILP DPCYELSISV MYAGHASYNQ CISH--PYFL GIDMFHIQVY SKGF--LTLP DPRYEISMSV MYSQHHSFSM CISH--PYFL GIDMFHIQVY SKGF--LTLP DPRYEISMSV MYSQHHSFSM CISQ--PFFL GIDMFHIQVY FKGS--LTLP DPRYELSMSV MYSQHHSFSQ CISH--PFFL GIDMFHIQVY FKGS--LTLP DPRYELSMSV MYSQHHSFSQ CISH--PFFL GIDMFHIQVY FKGS--LTLP DPRYELSMSV MYSQHHSFSQ CISH--PFFL GIDMFHIQVY FKGS--LTLP DPRYELSMSV MYSQHHSFSQ CISH--PFFL GIDMFHIQVY FKGS--LTLP DPRYELSMSV MYSQHHSFSQ CISH--PFFL GIDMFHIQVY FKGS--LTLP DPRYELSMSV MYSQHHSFSQ CIAV--PNYL NIDMFHIQVF YKGP--LTLP DPHYELSMSV MYAQHHSFSQ CISA--PYYL NIDMFHIQVF YKGS--LTLP DPRYELSMSV MYAQHHSFSQ VISH--PYFL GIDMFHDQVV YRGP--LTLP DPRYELSMSV MYAQHHSFSQ VISH--PYFL GIDMFHEQVV YRGP--LTLP DPRYELSMSV MYAQHHSFSQ VISH--PYFL GIDMFHDQVV YRGP--LTLP DPRYELSMSV MYAQHHSFSQ VISH--PYFL GIDMFHAQVF YRGS--LTLP DPRYELSMSV MYSQHHSFSQ VISH--PYFL GIDMFHGQVV YRGS--LTLP DPRYELSMSV MYSQHHSFSQ VISH--PYFL GIDMFHGQVV YRGS--LTLP DPRYELSMSV MYSQHHSFSQ VISV--PYFL GIDMFHGQVF LRGS--LTLP DPRYELSMSV MYAQHHSFSQ VISH--PYFL GIDMFHGQVF YRGS--LTLP DPRYELSMSV MYAQHHSMSQ VISQ--PYFL GIDMFHDQVF YRGS--LTLP DPRYELSMSV MYAQHHSFSQ VISH--PYFL GIDMFHDQVF YRGS--LTLP DPRYELSMSV MYAQHHSFSQ VISH--PYFL GIDMFHDQVF YRGS--LTLP DPRYELSMSV MYAQHHSFSQ VISV--PYFL GIDMFHDQVF YRGS--LTLP DPRYEMSMSV MYAQHHSFSQ -CSY--PSFL GMCMFHAMVQ SKGD--LVLP DSPNEDSVSF MYIQHHSFHE -CSY--PSFL GMCMFHAMVQ SKGD--LVLP DSPNEDSVSF MYIQHHSFHE -CSY--PSFL GMCMFYAMVQ SKGD--LVLP DSCNEDSVSF MYIQHHSFHE -CSY--PSFL GMCMFYAMVQ SKGD--LVLP DSCNEDSVSF MYIQHHSFHE -CSY--PSFL GMCMFYAMVQ SKGD--LVLP DSCNEDSVSF MYIQHHSFHE -CSH--PSFL GMCMFYAMVQ SKGD--LVLP DSCNEDSVSF MYIQHHSFHE -CSH--PSFL GMCMFHAMVK SKGN--LVLP DSCNEDSVSF MYIQHHSFHE -CSH--PSFL GMCMFHAMVQ SKGN--LVLP DSCNEDSVSF MYIQHHSFHE -CNS--PSFL GMCMFHPQVQ AKGE--LVLP DNPNENSVEF MYRRHHSLSD -CNC--PSFL GMCMFHPQVQ AKGE--LVLP DNPNENSVEF MYRRHHSLSD -CNS--PPFL GMCMFHPQVQ MKGE--LVLP DNPNENSVEF MYRRHHSLSD -CNS--PSFL GMCMFHPQVQ AKGE--LVLP DNPNENSVEF MYRRHHSLSD -CNP--PSFL GMCMFHPQVR SKGE--LVLP DNPNENSVEF MYRRHHSLSD -CNP--PSFL GMCMFHPQVR SKGE--LVLP DNPNENSVEF MYRRHHSLSD -CNS--PGFL GMCMFHPQVQ RKGE--LVLP DNPNENSVEF MYRRHHSLSD -CNS--PGFL GMCMFHPQVQ AKGE--LVLP DNPNENSVEF MYRRHHSLSD -CNS--PGFL GMCMFHPQVQ AKGE--LVLP DNPNENSVEF MYRRHHSLSD -CNL--PSFL GMCMFHNQVQ CKGE--LVLP DNVNENSVEF MYRRHHSLSD -CNS--PSFL GMCMFHNQVP NKGA--LVLP DNVNENSVEF MYRRHHSLSD -CNS--PSFL GMCMFHNQVS FKGE--LVLP DNPNENSVEF MYRRHHSLSD -CNS--PSFL GMCMFHNQVS VKGE--LVLP DNPNENSVEF MYRRHHSLSD -CNS--PSFL GMCMFVNQVS FKGE--LVLP DNPNENSVEF MYRRHHSLSD -CNS--PSFL GMCMFHVQVQ FKGE--LVLP DHPNENSVEF MYRRHHSLSD -CSY--PSFL PMCMFVPQVL SKGE--LVLP DSSNEESISF VYLQHHSFSV -CSY--PSFL PMCMFVSQVL SKGE--LVLP DSSNEESISF VYLQHHSFSD -CSV--PSFL PMCMFVSQVL HKGE--LVLP DSSNEESISF VYLQHHSFSD -CSV--PSFL PMCMFVSQVL HKGE--LVLP DSSNEESISF VYLQHHSFSE -CSY--PSFL PMCMFVAQVI AKGE--LVLP DSSTEESISF MYLQHHSFSD -CSY--PSFL PMCMFVAQVI AKGE--LVLP DSSTEESISF MYLQHHSFSD -CSY--PSFL PMCMFVAQVI AKGE--LVLP DSSNEESISF MYLQHHSFSD -CSY--PSFL PVCMFVAQVI NKGE--LVLP DSSNEESISF MYLQHHSFSE -CSH--PSFL GMCMFHEQVR CKGE--LVLP DSPNEESVSF VYIQHHSFSD -CSH--PSFL GMCMFVEQVR CKGE--LVLP DSPNEESLSF VYIQHHSFSD -CSH--PSFL GMCMFHEQVR RKGE--LVLP DSPNEESVSF VYIQHHSFSD -CSH--PSFL GMCMFHEQVR RKGE--LVLP DSPNEESVSF VYIQHHSFSD -CSH--PSFL GMCMFHEQVR RKGE--LVLP DSPNEESVSF VYIQHHSFSD -CSH--PSFL GMCMFHEQVR CKGE--LVLP DSPNEESVSF VYIQHHSFSD -CSV--PSFL GMCMFHEQVR CKGE--LVLP DSPNEESVSF VYIQHHSFSD -CSV--PSFL GMCMFHEQVR CKGE--LVLP DSPNEESVSF VYIQHHSFSD -CSF--PSFL GMCMFHDQVR CKGE--LVLP DSKNEESVSF VYIQHHSFSD -CSF--PSFL GMCMFHEQVR CKGE--LVLS DSKNEESVSF VYIQHPSFSD -CSF--PSFL GMCMFHQQVR CKGE--LVLP DSKNEESVSF VYIQHHSFND -CSF--PSFL GMCMFHEQVR CKGE--LVLP DSKNEESVSF VYIQHHSFSD -CSF--PSFL GMCMFHEQVR CKGE--LVLP DSKNEESVSF VYIQHHSFSD -CSV--PSFL GVCMFQRQVV MKGE--LVLP DSADEGSVSF MYIQHHSFAA VNS---RAFI GVSIYDLKVS HKMI--QACN PIQN------ ---DNHAFSD VNS---RAFI GVSTYDLKVS HKMI--QACN PIQN------ ---DNHAFSD VNS---RAFI GVSTYDLKVS HKMI--QACN PIQN------ ---DNHAFSD VNS---RAFV GVSSYDLRVS HKMI--QACT LIQN------ ---DNVAFSD VNS---RAFI GVSSYDLKVS HKMI--QACN PIQN------ ---DNVAFSD KRCLNGLHQG DHEESVH--- HAPRI----M R---LIGH-H LVIIVLMNHD KRCLNGLHQG DHEESVH--- HAPRI----M R---LIGH-H LVIIVLMNHD KRCLNGLHQG DHEESVH--- HAPRI----M R---LIGH-H LVIIVLMNHD KRCINKLHQG DHNESCH--- HNPRI----V R---RIGH-H LVIIVLVNRD KRCLNVLHQG DHEESRH--- HNARI----V R---RIGL-N LVIISYANVD KRCLNVLHQG DHEESRH--- HNARI----V R---RIGM-N LVIISYANVD KRCLNVLHQG DHEESRH--- HNARI----V R---RIGM-N LVIISYANVD KRCLNVLHQG DHEESRH--- HNARI----V R---RIGM-N LVIISYANVD KRCLNVLHQG DHEESRH--- HNARI----V R---RIGM-N LVIISYANVD KRCLNVLHQG DHEESRH--- HNARI----V R---RIGM-N LVIISYANVD KRCLNVLVQG DHDESRH--- HNARI----V A---RIGM-N MVIIRYLNVE KRCLNVLVQG DHEESEH--- HNARI----M S---KIGT-H LVIINIINVE KRCLNVLVQG DHEESEH--- HNARI----M S---RIGT-H LVIVNVINVE RRVLNVLHQG DVEES-H--- HSARC----V R---RIGF-H LVIIVFWNVE RRVLNVLHQG DVEES-H--- HSARC----V R---RIGF-H LVIIVYWNVE RRVLNVLHQG DHEES-H--- VSARC----M R---RIGF-H LVIINFWNVE RRVLNVLHQG DHEES-H--- HSARC----M R---RIGY-H RVIINYWNVE RRVLNVLHQG DHEES-H--- HSARC----M R---RIGYGH LVIINFWNVE RRVLNVLHQG DHEES-H--- HSARC----M R---RIGY-V LVIINFWNVE KRCINNLDQG DQEDSNHRKE HKIRASVLLY R---QIGI-L -VIIKEANEL KRCINGLDNG DRQDSDPRKE HKIRNSVLLY R---QIGL-L -VIIKEANEL KRCINGLDNG DRQDSDPRKE HKIRNSVLLY R---QIGL-L -VIIKEANEL KRCINGLDNG DRQDSDPRKE HKIRNSVLLY R---QIGL-L -VIIKEANEL KRCINGLDNG DRQDSDPRKE HKIRNSVLLY R---QIGL-L -VIIKEANEL KRCINGLDNG DRQDSDPRKE HKIRNSVLLY R---QIGL-L -VIIKEANEL KRCINGLDNG DRQDSDPRKE HKIRNSVLLY R---QIGL-L -VIIKEANEL KRCINGLDNG DRQDSDPQKE HKIRNSVLLY R---QIGL-L -VIIKEANEI KRCLNNLDQG DREDSSHRKE HKIRRSVLLY Q---QIGC-L -VITRERNEL KRCLNGLDHG DREESPHQIE HKMRKSVLIY N---PIGY-L -VIIKDANSM KRCLNGLDHG DREESPHQIE HKMRKSVLIY S---PIGY-L -VIIKDANSM KRCLNGLDVG DREESPHHID HKMRKSVLIY N---PIGY-L -VIIKNANSL KRCLNGLDVG DREESPHQIE HKMRKSVLIY N---PIGY-L -VIIKNANSL KRCLNGLDVG DREESPHQIE HKMRKSVLIY N---PIGY-L -VIIKNANSL KRCLNGLDVG DREESPHQIE HKMRKSVLIY N---PIGY-L -VIIKNANSL KRCLNGLDVG DREESPHQIE HKMRKSVLIY N---PIGY-L -VIIKNANSL KRCLNGLDVG DREESPHQIE HKMRKSVLIY N---PIGY-L -VIIKNANSL KRCLNALDHG DREESPHQIE HKMRKSVLLY N---PIGY-L -VIIKNANSM KRCLNALDYG DREESPHQIE HKMRKSVLLY Y---TIGW-L -VIIKNADSM KRCLNALDHG DTQESPHGIE HNMRNSVLLY N---PIGF-L -VIIKNINSM KRCLNALDHG DTQESPHGIE HNMRNSVLLY N---PIGF-L -VIIKNINSM KRCLNALDHG DTQESPHGIE HNMRNSVLLY N---PIGF-L -VIIKNINSM KRCLNPLDHG DRQESSHGIE HNMRSSVLLY N---PIGF-L -VIIDNINPM KRCLNPLDHG DRQESPHGIE HNMRSSVLLY N---PIGF-L -VIIDNINPM KRCLNPLDHG DRQESPHGIE HNMRSSVLLY N---PIGF-L -VIIDNINPM KRCLNHLDHG DRQESPHGIE HNMRKSVLLY N---PQGY-L -VIIKNVNSM KRCLNTLDHG DRQESPHGIE HNMRKSVLLY D---PIGY-L -VIIKNVNSM KRCLNALDHG DRQESPHGIE HNMRKSVLLD S---PIGY-L -VIIKNINSM KRCLNALDHG DRQESPHGIE HNMRKSVLLD S---SIGY-I -VIIKNLNSM KRCLNALDHG DRQESPHGIE HNMRKSVLLD S---PIGY-I -VIIKNVNSM KRCLNALDYG DRQESPHGIE HNMRKNVLLD N---PIGH-L -VIIKNENSM KRCLNPLNVG DREDSEHIKE KHMRI----Y A---NIGG-H -IIIREWNDV KRCLNPLNVG DREDSDHVKE KHMRI----Y A---NIGG-H -IIIREWNDV KRCLNPLNVG DREDSDHTKE KHMRI----Y A---NIGG-H -IIIREWNDV KRCLNPLNVG DREDSDHTKE KHMRI----Y A---NIGG-H -IIIREWNDV KRCLNPLNVG DREDSDHTKE KHMRI----Y A---NIGG-H -IIIREWNDV KRCLNPLNVG DREDSDHTKE KHMRI----Y A---NIGG-H -IIIREWNDV KRCLNHLNVG DREDSEHIKE KHMRV----Y A---SIGG-H -IIIREWNDV KRCLNHLNVG DREDSEHIKE KHMRI----Y A---SIGG-H -IIIREWNEV KRCLNLLYVG DSQDSQHLRE KQLRI----Y G---RIGK-V -VIIKVWNAV KRCLNLLFVG DSQDSQHQRE KQLRI----Y G---RIGK-V -VIIKVWNAA KRCLNLLYVG DSQDSQHLRE KQLRI----Y G---RIGK-V -VIIKVWNAV KRCLNLLYVG DSQDSQHLRE KQLRI----Y G---RIGK-V -VIIKVWNAV KRCLNLLYTG DSQDSQHLRE KQLRI----Y G---RIGR-V -VILQVWNAV KRCLNLLYVG DSQDSQHLRE KQLRI----Y G---RIGR-V -VILQVWNAV KRCLNLLYVG DNQDSQHLRE KQLRM----Y G---EIGK-V -VIIKVVNAV KRCLNLLYVG DNQDSQHLRE KQLRM----Y G---EIGK-V -VIIKVVNAV KRCLNLLYVG DNQDSQHLRE KQLRM----Y G---EIGK-V -VIIKVVNAV KRCLNLLYVG DNQDSQHLRE KQLRI----Y Q---NIGK-V -VIIKVWNAN KRCLNLLYVG DNQDSQHLRE KQLRI----Y P---SIGK-V -VIIKVWNAA KRCLNLLYVG DEQDSSHLRE KQLRL----Y A---RIGK-V -VIIKVWNAV KRCLNLLYVG DNQDSSHLRE KQLRL----Y A---RIGK-V -VIIKVWNAV KRCLNLLYVG DNQDSSHLRE KQLRL----Y A---RIGK-V -VIIKVWNAV KRCLNLLYVG DNQDSQHLRE KQLRI----Y A---RIGK-V -VIIKVWNAV KRCLNYLNVG DSADSKHLKE KHLRA----E GMPGNIGK-H -VIINVWNAV KRCLNYLNVG DNSDSKHLKE KHLRA----Q GMPGNIGK-H -VIINVWNAV KRCLNYLNVG DNADSKHLKE KHLRA----Q GMPGNIGK-H -VIINVWNAV KRCLNYLNVG DNADSRHLKE KHLRA----Q GMPGNIGK-H -VIISVWNAV KRCLNYLNVG DNEDSKHAKE KFLRG----D G---GIGQ-H -VIINVWNAV KRCLNYLNVG DNEDSKHAKE KFLRG----D G---GIGQ-H -VIINVWNAV KRCLNCLNVG DNEDSKHLKE KFLRA----D G---GIGQ-H -VIINMWNAV KRCLNYLNVG DNEDSKHLKE KFLRA----D G---NIGQ-H -VIINMWNAV KRCLNCLNVG DNDDSEHLKE KHLRI----Y G---QIGH-H -VIINVWNAF KRCLNCLNVG DNDDSEHLKE KHLRI----Y G---QIGR-H -VIINVWNAF KRCLNCLNVG DNDDSEHLKE KHLRI----Y G---QIGR-H -VIINVWNAF KRCLNCLNAG DNDDSEHLKE KHLRT----Y G---QIGR-H -VIINVWNAF KRCLNCLNVG DNDDSEHLKE KHLRV----Y G---QIGR-H -VIINVWNAF KRCLNYLNVG DNDDSEHLKE KHLRI----Y G---VIGK-H -VIINVWNAF KRCLNYLNVG DNDDSEHLKE KHLRI----Y G---EIGK-H -VIINVWNAF KRCLNYLNVG DNDDSEHIKE KHLRI----Y G---EIGK-H -VIINVWNAF KRCLNYLNVG DNEDSEHLKE KHLRV----Y G---NIGK-H -VIINSWNAM KRCLNYLNVG DNEDSEHLKE KVLRV----Y G---DIGK-H -VIINVWNAM KRCLNYLNVG DNEDSEHLEE KHLRV----Y G---NIGK-H -VIINVWNAM KRCLNELNVG DNEDSIHLKE KHLRI----Y G---NIGK-V -VIINVWNAM KRCLNSLNVG DNEDSIHLKE KHLRI----Y G---DIGK-H -VIINVWNAM KRCLNVLNVG DNEDSVHLRH KQLRV----Y G---KIGK-H -VIISVPNAI QRCLNDLSQG VVGASFPKMQ HGVRH----Y K---K----- ---RVYENEV QRCLNDLAQG VVGASHPKMQ HGVRH----Y K---K----- ---RVYENEV QRCLNDLAQG VVGASHPKMQ HGVRH----Y K---K----- ---RVYENEV QRPLNDLNQG CVGTSHPKMP HGVRH----Y K---K----- ---KVYENEV QRVLNDLSQG CVGTSHPKMP HGVRH----Y K---K----- ---RVYENEV MGRLNHLEPD AVIPSPLRIG GDGWVPKPI- ---------- -------EDG MGRLNHLEPD AVIPSPLRIG GDGWVPKPI- ---------- -------EDG MGRLNHLEPD AVIPSPLRIG GDGWVPKPI- ---------- -------EDG MGRLNYIEPD ALVPSPLRFG GDEWVPKPI- ---------- -------EDG MGRLNHCEDE AIVFSPLRYG GAGFVSKPI- ---------- -------GDG MGRLNHCEDQ AIVYSPLRYG GAGFVSKPI- ---------- -------ADG MGRLNHCEDQ AIVYSPLRYG GAGFVSKPI- ---------- -------ADG MGRLNHCEDE AIVFSPLRYG GAGFVSKPI- ---------- -------ADG MGRLNHCEDE AIVFSPLRYG GAGFVSKPI- ---------- -------ADG MGRLNHCEDE AIVFSPLRYG GAGFVSKPI- ---------- -------ADG MGRLNHCEDD AMVYSPLRIG GAGTHSKPI- ---------- -------ADG MQRLNHCEDK AIVYSPDRIG GAGFHSKPI- ---------- -------ADG MQRLNHCEDK AIVYSPDRIG GAGFHSKPI- ---------- -------ADG MGRLNHCEDE AIIYSPLRIG GAGFHSKPV- ---------- -------DDG MGRLNHCEDE AIIYSPLRIG GAGFHSKPV- ---------- -------DDG MGRLNHCEDE AIIYSPLRIG GAGFHSKPI- ---------- -------NDG MGRLNHCEDE AIIYSPLRIG GAGFHSKPI- ---------- -------ADG MGRLNHCEDE AIIYSPLRIG GAGFHSKPI- ---------- -------ADG MGRLIHCEDE AIIYSPLRIG GAGFHSKPI- ---------- -------ADG MNRLNHKEPE NGIIFPLR-- -DAQDPKQI- ---------- -------LNG MGRLNHKEPK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG MGRLNHKEPK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG MGRLNHKEPK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG MGRLNHKEPK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG MGRLNHKEPK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG MGRLNHKERK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG MGRLNHKERK NGVIFPLR-- -GAQGPKQI- ---------- -------SNG MGRLNHKEPR DGVIFPHR-- -GAQGPKQL- ---------- -------ANG MGRLNHHESR AVVAFPLR-- -GAEGPKQV- ---------- -------MEG MGRLNHHESR AIVAFPLR-- -GAEGPKQV- ---------- -------MEG MGRLNHHESR LIVTFPLR-- -GAEGPKQV- ---------- -------MEG MGRLNHYESR AIVTFPLR-- -GAEGPKQV- ---------- -------MEG MGRLNHHESR AIVTFPLR-- -GAEGPKQV- ---------- -------MEG MGRLNHHESR AIVTFPLR-- -GAEGPKQV- ---------- -------MEG MGRLNHHESR AIVTFPLR-- -GAEGPKQV- ---------- -------MEG MGRLNHHESR AIVTFPLR-- -GAEGPKQV- ---------- -------MEG MGRLNHHESC AIVMFPLR-- -GAEGPKQI- ---------- -------IEG MGRLNHHESC AMVMFPLR-- -GAEGPKQI- ---------- -------MEG MGRLNHHESR AVVGFPLR-- -GAENPKQI- ---------- -------MEG MGRLNHHESR AVVGFPLR-- -GAENPKQI- ---------- -------MEG MGRLNHYESR AVVGFPLR-- -GAENPKQI- ---------- -------MEG MGRLNHHESR AIVTFPLR-- -GAEHPKQI- ---------- -------VEG MGRLNHHESR AIVTFPLR-- -GAEHPKQI- ---------- -------MEG MGRLNHHESR AIVTFPLR-- -GAEHPKQI- ---------- -------MEG MGRLNHHESR AVVTFPLR-- -GADNPRQI- ---------- -------MEG MGRLNHHESR AKVTFPLR-- -GADNPRQI- ---------- -------MEG MGRLNHHESR AVFAFPLR-- -GAENPKQI- ---------- -------MEG MGRLNHHESR AVVTFPLR-- -GAEDPKQI- ---------- -------MEG MGRLNHHESR AVVTFPLR-- -GAEDPKQI- ---------- -------MEG MGRLNHHESR AVVTFPLR-- -GAEDPKQK- ---------- -------MEG MGRLNHIEPG AEVIFPLRK- -RGQHSKPV- ---------- -------IDG MGRLNHIEPG AEVIFPLRK- -RGQHSKPV- ---------- -------IDG MGRLNVIEPE AEVIFPLRK- -RGQHSKPV- ---------- -------IDG MGRLNVIEPE AEVIFPLRK- -RGQHSKPV- ---------- -------IDG MGRLNVIEPE AEVIFPLRK- -RGQHSKPV- ---------- -------IDG MGRLNVIEPE AEVIFPLRK- -RGQHSKPV- ---------- -------IDG MGRLNHIEPG AEVTFPLRR- -RGQASKPV- ---------- -------IDG MGRLNHIEVG AEVTFPLRR- -RGQASKPV- ---------- -------IDG MGRLNHHEPP AEVMFPLRK- -GGQDSKPF- ---------- -------IDG MGRLNHVEPP AEVMFPLRR- -GGSDSKPF- ---------- -------LDG MGRLNHHEPP AEVMFPLRR- -GGSDSKPF- ---------- -------IDG MGRLNHHEPP AEVMFPLRR- -GGSCSKPF- ---------- -------VDG MGRLNHHEPP AEVMFPLRK- -GGSNSKPV- ---------- -------YDG MGRLNHHEPP AEVMFPLRK- -GGSNSKAV- ---------- -------YDG MGRLNHVEPS AEVMFPLRK- -GGSDSKPF- ---------- -------IDG MGRLNHHEPS AEVMFPLRK- -GGSDSKPF- ---------- -------IDG MGRLNHHEPS AEVMFPLRK- -GGSDSKPF- ---------- -------IDG MGRLNHHEPP AEVMFPLRK- -GGSDSKPF- ---------- -------IDG MGRLNHHEPP AEVMFPLRK- -GGSDSKPF- ---------- -------IDG MGRLNHHEPP AAVMFPLRK- -GGSDSKPF- ---------- -------IDG MGRLNHHEPP AEVMFPLRK- -CGSDSKPF- ---------- -------IDG MGRLNHHEPP AEVVFPLRK- -GGSDSKPF- ---------- -------IDG MGRLNHHEPP AEVVFPLRK- -GGSDSKPF- ---------- -------IDG MGRLNHHEPP AEVVFPLRQ- -GGPDSKPM- ---------- -------IDG MGRLNHHESP AEVVFPLRQ- -GGPDSKPM- ---------- -------IDG MGRLNHHEPP AEVVFPLRQ- -GGPDSKPM- ---------- -------IDG MGRLNHHEPP AEVVFPLRQ- -GGPDSKPM- ---------- -------MDG MGRLNHHEPP PEVVFPLRQ- -GGPDSKPM- ---------- -------VDG MGRLNHHEPP PEVVFPLRQ- -GGPDSKPM- ---------- -------VDG MGRLNHHEPP PEVVYPLRQ- -GGPNSKPM- ---------- -------VDG MGRLNHHEPP AEVNFPLRQ- -GGPDSKPM- ---------- -------VDG MGRLNHHEPP AEVVYPLRK- -GGPDSK-M- ---------- -------VDG MGRLNHHEPP AEVVYPLRK- -GGPDSKPV- ---------- -------VDG MGRLNHHEPP AEAVYPLRK- -GGPDSKPM- ---------- -------VDG MGRLNHHEPP AEVVYPLRK- -GGPDSKPM- ---------- -------VDG MGRLNHHEPP AEVVYPLRK- -GGPDSKLM- ---------- -------VDG MGRLNHHEPP ADVEYPLRK- -GGPDSKPM- ---------- -------VDG MGRLNHHEPP AEVMYPLRK- -GGPESKPM- ---------- -------VDG MGRLNHHEPP AEVMYPLRK- -GGPESKPM- ---------- -------VDG MGRLNHHEPA AEVVYPLRK- -GGPESKPM- ---------- -------VDG MGRLNHHEPA AEVVCPLRK- -GGPDSKPM- ---------- -------VDG MGRLNHHEPA AEVVCPLRK- -GGPDSKPM- ---------- -------VDG MGRLNHHEPA AEVVYPLRK- -GGPDSKPM- ---------- -------VDG MGRLNHHEPA AEVVYPLRK- -GGPDSKPM- ---------- -------VDG MGRLNHHEPS AIVVFPLRQ- -RGADSKSV- ---------- -------GDG GGRLDDAEAI DDMGVWIRW- -EGVKHAWCH PIGSCPDHLV CSLLVLQPAR GGRLDDAEAI DDMGVWIRW- -EGVKHAWCH PIGSCPDHLV CSLLVLQPAR GGRLDDAEAI DDMGVWIRW- -EGVKHAWCH PIGSCPDHLV CSLLVLQPAR GDRLDDAEGI DDVGVWMRW- -NGVKHAWCH IIGSCPDHLV CSLLVLQPGR GDRLDDAEAV DDVGVWMRW- -AGVKVAVCH VIGSCPDHLV VSLLVLQPAR FFDK-D-Q-- ----SRPDVA NASIVPDK-- --DQWVGAHN QWGSSLRKVG FFDK-D-Q-- ----SRPDVA NASIVPDK-- --DQWVGAHN QWGSSLRKVG FFDK-D-Q-- ----ARPDVA NASIVPDK-- --DQWVGAHN QWGSSLRKVG FFDK-D-Q-- ----ARPDVA NA-IVPDR-- --DQWVGAHK EWGSSLCKVG FFDK-D-Q-- ----ARPLMA NAVCVPEN-- --DQWTGAVG QWCSSVVKCG FFDK-D-Q-- ----ARPLMA NAVCVPEN-- --DQWTGAVG QWCSSVVKCG FFDK-D-Q-- ----ARPLMA NAVCVPEN-- --DQWTGAVG QWCSSVVKCG FFDK-D-Q-- ----ARPLMA NAHCVPEN-- --DQWTGAVG QWCSSVVKCG FFDK-D-Q-- ----ARPLMA NAHCVPEN-- --DQWTGAVG QWCSSVVKCG FFDK-D-Q-- ----ARPLMA NAHCVPEN-- --DQWTGAVG QWCSSVVKCG FFDK-D-Q-- ----ARPLIA NAHIVPEN-- --DQWTGAVA QWCSSVIKCG FFDK-D-Q-- ----ARPIIA NAHIVPVN-- --DQWTGPMA QWCSSVIKCG FFDK-D-Q-- ----ARPIIA NAHIVPIY-- --DQWTGPMA QWCSSVIKCG FFDK-D-Q-- ----ARPVIA SAHIVPEH-- --DQWSGAMA QWCSSVIKCG FFDK-D-Q-- ----ARPVIA SAHIVPEH-- --DQWSGAMA QWCSSVIKCG FFDK-D-Q-- ----ARPVIA NAHIVPEH-- --DQWSGAMA QWCSSVIKCG FFDK-D-Q-- ----ARPVIA NAHIVPEY-- --DQWSGAMA QWCSSVIKCG FFDK-D-Q-- ----ARPVIA NAHIVPEY-- --DQWSGAMA QWCSSVIKCG FFDK-D-Q-- ----ARPVIA NAHIVPEY-- --DQWSGAMP QWVSSVIKCG LFDK-E-E-- ----NRPMVQ DADSVVGS-- --AQWAGQHR SWCSSDDKA- LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- LFEK-E-E-- ----NRPMVQ DAGSVPGR-- --KQWAGQHR AWCSSDDKA- LFEK-E-E-- ----NRPMVQ DAGSVPER-- --AQWPGQQR AWCSSDDKA- LFDK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- LFDK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- LFLK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- LFLK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- LFLK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- LFLK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- LFLK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- LFLK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- LFDK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- LFDK-E-E-- ----ARPMVQ DAASVPGR-- --AQWVGRVR AWCSSDVKA- HFDK-E-E-- ----ARPIVQ DAASVPGK-- --AQWVGRVR AWCSSDVKA- HFDK-E-E-- ----ARPIVQ DAASVPGK-- --AQWVGRVR AWCSSDVKA- HFDK-E-E-- ----ARPIVQ DAASVPEK-- --AQWVGRVR AWCSSDVKA- HFDK-E-E-- ----ARPIVK DAASVPGK-- --AQWVGRIR AWCSSDVKA- HFDK-E-E-- ----ARPIVQ DAASVPGE-- --AQWVGHIR AWCSSDIKA- HFDK-E-E-- ----ARPIVQ DAASVPGE-- --AQWVGHIR AWCSSDIKA- HFDK-E-E-- ----ARPVVQ DAPGVPGK-- --AQWVGKVR AWCSSDVKA- HFDK-E-E-- ----ARPVVQ DAPGVPEQ-- --AQWVGKMR AWCSSDVKA- HFDK-EKE-- ----ARPVVQ DAPGVPGK-- --AQWVGRIR AWCSSDVKA- HFDK-E-E-- ----ARPVVQ DAPGVPGK-- --AQWVGRVR AWCSSEIKT- HFDK-E-E-- ----ARPVVQ DAPGVPGK-- --AQWVGRVR AWCSSEIKT- SFDK-E-E-- ----ARPVVQ DAPGVPGR-- --AQWVGRVR PWCSSDVKA- FFAK-D-E-- ----DRPGIQ NAVSVPCG-- --DQWVGSIR GWCSSQHRYG FFAK-D-E-- ----DRPGIQ NAMSVPCG-- --DQWVGSIR GWCSSQHRYG FFAK-D-E-- ----ERPGIQ NAMNVPCG-- --DQWVGSIR GWCSSQHRYG FFAK-D-E-- ----ERPGIQ NAMNVPCG-- --DQWVGSIR GWCSSQHRYG FFAK-D-E-- ----ERPGIQ NAMNVPCG-- --DQWVGSIR GWCSSQHRYG FFAK-D-E-- ----ERPGIQ NAMNVPCG-- --DQWVGSIR GWCSSQHRYG FFAK-D-E-- ----DRPGIQ NAMSVPCG-- --DQWVGSVR GWCSSQHRYG FFAK-D-E-- ----DRPGIQ NAMSVPCG-- --EQWVGSVR GWCSSQHRYG FF-------- ----DRPGIA NAMSVKCG-- --DQWVGSIR GWCSSQHQYG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGSIR GWCSSQHQYG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGSIR GWCSSQHQYG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGSIR GWCSSQHQYG FF-------- ----DRPAIA NAMSVPCG-- --DQWVGSIR GWCSSQHQYG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGSIR GWCSSQHQYG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGSIR GWCSSQHQYG FF-------- ----DRPGIA NAMSVPCG-- --DQWMGSIK GWCSSQHQYG FF-------- ----DRPGIA NAMSVPCG-- --DQWMGSIK GWCSSQHQYG FF-------- ----DRPGIA NAMNVPCG-- --DQWVGCIR GWCSSQHQYG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGCIR GWCSSQHQYG FF-------- ----DRPGIA NAMSVPCG-- --DQWMGIIR GWCSSQHQYG FF-------- ----DRPGIA NAMSVPCG-- --DQWMGVIR GWCSSQHQYG FF-------- ----DRPGIA NAMSVPCG-- --DQWMGVIR GWCSSQHQYG FF-------- ----DRPGIA NAMSVPCG-- --DQWVGVIR GWCSSQHQYG FFDK-D-D-- ----DRPGIA NAMSVPCG-- --EQWGGPTR GWCSSQQKFG FFDK-D-D-- ----DRPGIA NAMSVPCG-- --EQWGGPTR GWCSSQQKFG FFDK-D-D-- ----DRPGIA NAMSVPCG-- --EQWGGPTR GWCSSQQKFG FFDK-D-D-- ----DRPGIA NAMSVPCG-- --EQWGGPTR GWCSSQQKFG FFDK-D-D-- ----DRPGIA NAMNVPCG-- --DQWGGPIR GWCSSQHKFG FFDK-D-D-- ----DRPGIA NAMNVPCG-- --DQWGGPIR GWCSSQHKFG FFDK-D-D-- ----DRPGIA NAMNVPCG-- --DQWGGPIR GWCSSQHRFG FFDK-D-D-- ----DSPGIA NAMNVPCG-- --DQWGGPIR GWCSSQHKFG FFDK-E-D-- ----DRPGIA NAVSVPCA-- --DQVGGPIR GWCSSQVNFG FFDK-E-D-- ----DRPGIA NAVSVPCA-- --DQVGGPIR GWCSSQVKFG FFDK-E-D-- ----DRPGIA NAVSVPCA-- --DQVGGPIR GWCSSQVKFG FFDK-E-D-- ----HRPGIA NAVSVPCA-- --DQVGGPLR GWCSSQVKFG FFDK-E-D-- ----ERPGIA NAVSVPCA-- --DQVGGPIR GWCSSQVKFG FFDK-E-D-- ----DRPGIA NAVSVPCG-- --DQVGGPIR GWCSSQVKFG FFDK-E-D-- ----DRPGIA NAVSVPCT-- --DQVGGPIR GWCSSQVKFG FFDK-E-D-- ----DRPGIA NAVSVPCA-- --DQVGGPIR GWCSSQVKFG FFDK-E-D-- ----DRPGIA NAVSVPCD-- --DQWGGPVR GWCSSQHKFG FFDK-E-D-- ----DRPGVA NAVSVPCD-- --DQWGGPVR GWCSSQHKFG FFDK-E-D-- ----DRPGVA NAMSVPCD-- --DQWGGPVR GWCSSQHKFG FFDK-E-D-- ----DRPGIA NAVAVPCS-- --DQWGGPVR GWCSSQHKFG FFDK-E-D-- ----DRPGIA NAVAVPCS-- --DQWGGPVR GWCSSQHKFG FYDK-D-E-- ----DRPGIA NAVSVPVA-- --EQWVGSVR GWYSSEHKYG NFIQLS-MVR DTPHRLPKMA SAAPVYKQTC HLEQVQGTAR MWGKSGPRVV NFIQLS-MVR DTPHGLPKMA SAAPVYKQTC HLEQVQGTAR MWGKSGPRVV NFIQLS-MVR DTPHGLPKMA SAAPVYKQTC HLEQVQGTAR MWGKSGPRVV NFIQLS-VVH DTPHRLPKMA SAAAVFKQTC HLEQVPGTAR MWGKSGPRVR NFIQLS-VVH DTPHRLPKMA SAAAVFKQTC HLEQVAGTAR VWGKSGPRVK LAVHDLR-NI SDLYRCVWMD CHLGANHYRQ ISRMKMITPT YTHMTEYANG LAVHDLR-NI SDLYRCVWMD CHLGANHYRQ ISRMKMITPT YTHMTEYANG LAVHDLR-NI SDLYRCVWMD CHLGANHYRQ ISRMKMITPT YTHMTEYANG LAVHDLR-NV SDLYRCVWME CVLGANHYQQ VSRMKMVTPA YTHMTEYANG LADHPLE-NI SRLLRCVVME CVLGANVYHQ ISRMKMHGPT YSHVMEYAHG LADHPLE-NI SRLLRCVVME CVLGANVYHQ ISRMKMHGPT YSHVMEYAHG LADHPLE-NI SRLLRCVVME CVLGANVYHQ ISRMKMHGPT YSHVMEYAHG LADHPLE-NI SRLLRCVVME CILGANVYHQ ISRMKMHGPT YSHVMEYAHG LADHPLE-NI SRLLRCVVME CVLGANVYHQ ISRMKMHGPT YSHVMEYAHG LADHPLE-NI SRLLRCVVME CVLGANVYHQ ISRMKMHGPT YSHVMEYAHG LANHDLE-NI SRLLRCVVME CVLGANVYHQ ASRMKMHGPY YAHVTEYAHG LANHELE-DV SRLLRCVGME CNLGANHYHQ VSRMKMHGPI YSHMTEYAVG LANHELK-VV SRLLRCVGME CNLGANHYHQ VSRMKMHGPI YSHMTEYAHG LANHDLE-NV SRLLRCVMID CNQGANHYIQ ISRMKVHGPT YSHMTEYAHG LANHDLE-NV SRLLRCVMID CNQGANHYIQ ISRMKVHGPT YSHMTEYAHG LANHDLE-NV SRLLRCVEID CNQGANHYVQ ISRMKMYGPT YSHMTEYAHG LANHDLE-NV SRLLRCVVID CNHGANHYVQ ISRMKMHGPT YSHMTEYAHG LANHDLE-NV SRLLRCVVID CNHGANHYVQ ISRMKMHGPT YSHMTEYAHG LANHDLE-NV SRLLRCVVID VNQGANHYVQ ISRMKMHGPT YSHMTEYAHG ---------- SQLPRNTHIV VEIGANVYEQ FSRMKTNIPI YAHVTEYAVG ---------- SQLPRNTRIV AEIGANVYEQ YSRMKTNIPI YARVTEYAVG ---------- SQLPRNTRIV AEIGANVYEQ YSRMKTVIPI YARVTEYAVG ---------- SQLPRNTRIV AEVGANVYEQ YSRMKTVIPI YARVTEYAVG ---------- SQLPRNTRIV AEIGANVYEQ YSRMKTNIPI YARVTEYAVG ---------- SQLPRNTRIV AEIGANVYEQ YSRMKTNIPI YARVTEYAVG ---------- SQLPRNTRIV AEIGANVYEQ YSRMKTNIPI YARVTEYAVG ---------- SQLPRNTRIV AEIGANVYEQ YSRMKTNIPI YARVTEYAVG ---------- SKLPRNTKMV AEIGANVYEQ ASRMKTNIPI YAHVTEYALG ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG ---------- SHLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI FAHVTEYAVG ---------- SRLPRNDPVV HEVGANVYQQ ISRMKTSIPI YAHVTEYAVG ---------- SQLPTNDPIV PEVGANVYQQ VSRMKTSIPI YAHVTEYAVG ---------- SKLPRNDPIV PEVGANVYQQ LSRMKTNIPI YAHVTEYAVG ---------- SKLPRNDPIV PEVGANVYQQ LSRMKTNIPI YAHVTEYAVG ---------- SKLPRNDPIV PEVGANVYQQ ISRMKTNIPI YAHVTEYAVG ---------- SQLPRNDPIV PEVGANVYQQ DSRMKTNIPI YAHVTEYACG ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI YAHTTEYACG ---------- SQLPRNDPIV PEVGANVYQQ VSRMKTNIPI YAHTTEYACG ---------- SQLPRNDTIV PEVGANVYQQ VSRVKTHIPI YAHVTEYAVG ---------- SQLPRNDTIV PEVGANVYQQ VSRVKTNIPI YAHVTEYAVG ---------- SELPRNDAIV PEVGANVYQQ VSRMKTNIPI YAHATEYAVG ---------- SELPRNDAIV VEVGANVYQQ ISRMKTDIPI YAHATEYAVG ---------- SELPRNDAIV PEVGANVYQQ ISRMKTDIPI YAHATEYAVG ---------- SELPRNDAIW PEVGANVYQQ ISRMKTNIPI YPHVTEYAVG LANHILVHEP SKLHKTYRIN RKVGANAYDQ DSRMKAAAPV YPHIMEYAHG LANHILVHEP SKLHKTYRIN RKVGANAYDQ DSRMKAAAPV YPHIMEYAHG LANHILVHEP SKLHKTYKIN RKVGANAYDQ VSRMKAAAPV YPHIMEYAHG LANHILVHEP SKLHKTYKIN RKVGANAYDQ VSRMKAAAPV YPHIMEYAHG LANHILVHEP SKLHKTYKIN RKVGANAYDQ VSRMKAAAPV YPHIMEYAHG LANHILVHEP SKLHKTYKVV RKVGANAYDQ VSRMKAAAPV YPHIMEYAHG LAIHILVHQQ SRLHKTYNID RKNGANAYEQ DSRMKAGAPV YPHIMEYAHG LAIHILVHQQ SRLHKTYNID RKNGANAYDQ DSRMKAGAPV YPHIMEYAHG LANHILVHGP SKLHRTYAIN KKMGANVYQQ TSRMKAAAPL YPHIMEYAHG LANHILIHGP SKLHRTYAIN KKMGANVYQQ TSRMKAAAPL YPHIMEYAHG LANHILVHNP SRLHRTYAIN KKMGANVYQQ SSRMKAAAPL YPHIMEYAHG LANHILIHNP SRLHRTYAIN KKMGANVYQQ TSRMKAAAPL YPHIMEYAHG LANHILVHGP SKLHRTYAIN RKVGANVYQQ TSRMKAAAPL YPHIMEYAHG LANHILVHGP SKLHRTYAIN RKVGANVYQQ MSRMKAAAPL YPHIMEYAHG LANHILVHGP SKLHRTYAIN KKMGANVYQQ ISRMKALAPL YPHIMEYAHG LANHILVHGP SKLHRTYAIN KKMGANVYQQ ISRMKALAPL YPHIMEYAHG LANHILVHGP SKLHRTYAIN KKMGANVYQQ ISRMKALAPL YPHIMEYAHG LANHILVHGP SKLHRTYAIN KKMGANVYQQ TSRMKAAAPI YPHIMEYAHG LANHILVHGP SKLHRTYAIN RKMGANVYQQ TSRMKAAAPL YPHIMEYAHG LANHILVHGP SKLHRTYAIN KKMGANVYQQ DSRMKAAAPL YPHIMEYAHG LANHVLVHGP SKLHRTYAIN KKMGANVYQQ DSRMKAAAPL YPHIMEYAHG LANHILVHGP SKLHRTYAIN KKMGANVYQQ DSRMKAAAPL YPHIMEYAHG LANHILVHGP SKLHRTYAIN KKMGANVYQQ DSRMKAAAPL YPHIMEYAHG LASHTLSHDP SSLHRTFAIN RKMGANVYQQ TSRMKASAPF YPHCMEYAHG LASHTLSHDP SNLHRTFAIN RKMGANVYQQ ASRMKASAPF YHHCMEYAHG LASHTLSHNP SNLHRTFAIN RKMGANVYQQ TSRMKASAPF YHHCMEYAHG LASHTLSHTP SNLHRTFAIN RKMGANVYQQ TSRMKASAPF YHHCMEYAHG LAFHTLSHDP SKLHRTFAIN QKMGANVYQQ MSRMKTPAPF YPHVMEYAHG LAFHTLSHDP SKLHRTFAIN QKMGANVYQQ MSRMKTPAPF YPHVMEYAHG LAFHTLKHDP SKLHRTFAIN EKMGANVYQQ NSRMKASAPF YPHVMEYAHG LASHTLSHDP SKLHRTFGIN KNMGANVYQQ TSRMKANAPF YPHVMEYAHG LANHTLVQ-P SKLHRTFKIN KKMGANVYQQ TSRMKAEAPV YPHIMEYAHG LANHTLVQ-P SKLHRTFKIN KKMGANVYQQ TSRMKAEAPV YPHIMEYAHG LANHTLVQ-P SKLHRTFKIN KKMGANVYHQ TSRMKAEAPV YPHIMEYAHG LANHTLVQ-P SKLHRTFKIN KKIGANVYQQ TSRMKADAPV YPHIMEYAHG LANHTLVQ-P SKLHRTFKIN KKIGANVYQQ TSRMKAEAPV YPHIMEYAHG LANHTLAQNP SKLHRTFKIN KKMGANVYQQ TSRMKAEAPV YPHIMEYAHG LANHTLVHNP SKLHRTFAIN KKMGANVYQQ TSRMKAEAPV YPHIMEYAHG LANHTLVHNP SKLHRTFAIN KKMGANVYQQ TSRMKAEAPV YPHIMEYAHG LADHTLIHIP SKLHRTFAIN KKMGANVYQQ TSRMKAAAPV YSHVMEYAHK LADHTLVHIP SKLHRTFAIT KKMGANVYQQ TSRMKATAPV YPHVMEYAHG LADHTLVHVP SKLHRTFPIN KKMGANVYQQ TSRMKATAPV YPHVMEYAHG LADHTLVHVP SLLHRTFAIN RKMGANVYQQ TSRMKAAAPV YPHMMEYAHG LADHTLVHVP SLLHRTFAIN KKMGANVYQQ TSRMKAAAPV YPHMMEYAHG LANHILI-GP SKLHRTYQTT AKMGANVVKA TNRMKRPQPV YPHVMEYANG LNGHMLK-TQ NTLHRQYLVD VELQATMVFV AARMKTDSSM YMSVREILHG LNGHMLK-TQ NTLHRQYLVD VELGATMVFV AARMKTDLSM YMSVREILHG LNGHMLK-TQ NTLHRQYLVD VELGATMVFV AARMKTDLSM YMSVREILHG LNGHVLK-NQ NILHRQYLVD VNLGATMVFV LARMKTDASM YMSHREILHG LNGHVLK-NQ NVLHRQYLVD VDLGATMVFV AARMKTDASM YMSHREILHG L----WHPFY KASDHKNEAQ GVGEK-PHQL PCVESETCQY ETKHAKVCKQ L----WHPFY KASDHKNEAQ GVGEK-PHQL PCVESETCQY ETKHAKVCKQ L----WHPFY KASDHKNEAQ GVGEK-PHQL PCVESETCQY ETKHAKVCKQ L----WHPFY KNPDHKNEAQ GVGEK-PHQL PCVESETCQY ETKHAKVCKQ L----WKPFY QASDHKNEAQ GVGER-THQL PCVESHTCQY EVKHAKVCKL L----WKPFY QASDHKNEAQ GVGER-THQL PCVESHTCQY EVKHAKVCKL L----WKPFY QASDHKNEAQ GVGER-THQL PCVESHTCQY EVKHAKVCKL M----WKPFY QASDHKNEAQ GVGER-THQL PCVDSHTCQY EVKHAQVCKL M----WKPFY QASDHKNEAQ GVGER-THQL PCVDSHTCQY EVKHAQVCKL M----WKPFY QASDHKNEAQ GVGER-THQL PCVDSHTCQY EVKHAQVCKL L----WKPFY QTSDHKNEAQ GVGES-THQL PCVESQTCQY EVKHAKVCKL L----WKPFY QASEHKNEAQ GLGER-PHQL PCVESQTCQY EIKVAKVCKL L----WKPFY QASEHKNEAQ GVGER-PHQL PCVESQKCQY EVKVAKVCKL L----WKPFY QASDHKNDAQ GVGER-PHQL PCVESQTCQY EMKHAKVCKL L----WKPFY QASDHKSDAQ GVGER-PHQL PCVESQTCQY EMKHAKVCKL L----WKPFY QASDHKNDAQ GVGER-PHQL PCVESQTCQY EVKHANVCKL L----WKPFY QASDHKNDAQ GVGER-PHQL PCVEPQTCQY EVKHAKVCKL L----WKPFY QASDHKNDAQ GVGER-THQL PCVEPQTCQY EVKHAKVCKL L----WKPFY QASDHKNDAQ GVGER-VHQL PCVEPQTCQY EVKHAKVCKH V----ERPFY E-SEFKNEAQ GWGES-GTSI PCVDSPDVQY EMKVAWVDKL V----ARPFY D-AEFKNEAQ GQGES-GTSI PCVDSPDIQY EMKHAWVEKL V----PRPFY D-AEFKNEAQ GQGES-GTSI PCVDSPDIQY EMKHAWVEKL V----PRPFY D-AEFKNEAQ GQGES-GTSI PCVDSPDIQY EMKHAWVEKL V----ARPFY D-AEFKNEAQ GQGES-GTSI PCVDSPDIQY EMKHAWVEKL V----ARPFY D-AEFKNEAQ GQGES-GTSI PCVDSPDIQY EMKHAWVEKL V----ARPFY D-AEFKNEAQ RQGES-GTSI PCVDSPDIQY EMKHAWVEKL V----ARPFY D-AEFKNEAQ GQGES-GTSI PCVDSPDIQY EMKHAWVEKL V----DRPFY D-SEFKNEAQ GQGES-GTSI PCVDSPDVQY ELKHAWVDKL I----VKPFY D-NEWKQEAQ GQGEA-GACI PCVDSKDVQY ELKHAYVKKL I----VKPFY D-NEWKQEAQ GQGEA-GACI PCVDSKDVQY ELKHAYVKKL I----VKPFY D-SEWKQEAQ GQGEA-GACI PCVDSPDVQY ELKHAYVKKL I----VKPFY D-SEWKQEAQ GQGEA-GACI PCVDSPDVQY ELKHAYVKKL I----VKPFY D-SEWKQEAQ GQGEA-GACI PCVDSPDVQY ELKHAYVKKL I----VKPFY D-SEWKQEAQ GQGEA-GACI PCVDSPDVQY ELKHAYVKKL I----VKPFY D-SEWKQEAQ GQGEA-GACI PCVDSPDVQY ELKHAYVKKL I----VKPFY D-SEWKQEAQ GQGEA-GACI PCVDSPDVQY ELKHAYVKKL I----IKPFY D-NEWKQEAQ GQGEA-GASI PCVNSPDVQY ELKHAHIKKL I----VKPFY D-NEWKQEAQ GQGEA-GASI PCVNSPDVQY ELKHAHIKKL I----VKPFY D-NEWKQEAQ GVGEA-GASI PCVDSPDVQY ELKHAEVRKL I----VKPFY D-NEWKQEAQ GVGEA-GASI PCVDSPDVQY ELKHAEVRKL I----VKPFY D-NEWKQEAQ GVGEA-GASI PCVDSPDVQY ELKHAEVRKL I----VKPFY E-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHAEVKKL I----VKPFY E-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHANVKKL I----VKPFY E-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHANVKKL I----VKPFY N-NEWKQEAQ GQGEA-GASI PCVDSPDDQY ELKHAEVKKL I----VKPFY N-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHAEVKKL I----VKPFY D-NEWKQEAQ GQGET-GASI PCVDSPDVQY EMKHAEVKKL I----VKPVY D-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHAEVKKL I----VKPVY D-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHAEVKKL I----VKPFY L-NEWKQEAQ GQGEA-GASI PCVDSPDVQY ELKHAEVKKL M----FNPFY GLSEPKNNAQ GNGEN-PMNK PCVESEDCQY EKKHASMDKL M----FNPFY GLSEPKNNAQ GNGEN-PMNK PCVESEDCQY EKKHASMDKL M----FNPFY GLSEPKNNAQ GNGEN-PMNV PCVESEDCQY EKKHASMTKL M----FNPFY GLSEPKNNAQ GNGEN-PMNV PCVESEDCQY EKKHASMTKL M----FNPFY GLSEPKNNAQ GNGEN-PMNV PCVESEDCQY EKKHASMTKL M----FNPFY GLSEPKNNAQ GNGEN-PMNV PCVESEDCQY EKKHASMTKL M----FNPFY GMSEPKNNAQ GNGEN-PMNV PCVESDDCQY EKKHASMDKQ M----FVPFY GMSEPKNNAQ GNGEN-PMNV PCVESDDCQY EKKHASMDKQ M----VKPFY GISEPKNEAQ GNGEN-PMNV PSVESGECQY EHKHAAMEKL M----VKPFY GQSEPKNEAQ GNGEN-PMNV PSVESGECQY ESKHAAMEKL M----VKPFY GISEPKNEAQ GNGEN-PMNV PSVESGECQY ESKHAAMEKL M----VKPFY GISEPKNEAQ GNGEN-PMNV PSVESGECQY ESKHAAMEKL M----VKPFY GISEPKNEAQ GNGEN-PMNV PSVESGECVY ESKHAAMEKL M----VKPFY GISEPKNEAQ GNGEN-PMNV PSVESGECVY ESKHAAMEKL M----VKPFY GISEPKNEAQ GNGEN-PMNV PCVESGECQY ENKHAAMEKL M----VKPFY GISEPKNEAQ GNGEN-PMNV PCVESGECQY ENKHAAMEKL M----VKPFY GISEPKNEAQ GNGEN-PMNV PCVESGECQY ENKHAAMEKL M----VKPFY GISEPKNEAQ KNGEN-PMNV PCVESGECQY ESKHAAMEKL M----VEPFY GISEPKNEAQ KNGEN-PMNV PCVESGECQY ESKHAAMEKL M----VKPFY GISEPKNEAQ KNGEN-PMNV PCVESGECQY ESKHAAMEKL M----VKPFY GISEPKNEAQ KNGEN-PMNV PCVESGECQY ESKHAAMEKL M----VKPFY GISEPKNEAQ KNGEN-PMNV PCVESGECQY ESKHAAMEKL M----VKPFY GISEPKNEAQ KNGEN-PMNV PCVESGECQY ESKHAAMEKL V----CKPFY EHSEPKNEAQ GNGEK-PMNV PCVESIDCQY ENKHATMTKL V----CKPFY EHSEPKNEAQ GNGEK-PMNV PCVESRDCQY ENKHATMTKL V----CKPFY EHSEPKNEAQ GNGEK-PMNV PCVESIDCQY ESKHAAMTKL V----CKPFY EHSEPKNEAQ GNGEK-PMNV PCVESIDCQY ENKHATMTKL V----CKPFY EHSEPKNEAQ GNGEKVPMNV PCVESMDCQY ENKHAPMAKL V----CKPFY EHSEPKNEAQ GNGEKVPMNV PCVESMDCQY ENKHAPMAKL V----CKPFY EHSEPKNDAQ GNGDKVPMNV PCVESMDCQY ENKHAPMAKL V----CKPFY EHSEPKNEAQ GNGEK-PMNV PCVESIECQY ENKHATMAKL V----CKPFY ERSEPKNEAQ GNGEK-QGNV PCVESMECQY ENKHAIMAKL V----CKPFY ERSEPKNEAQ GNGEK-QGNV PCVESMECQY ENKHAIMAKL V----CKPFY ERSEPKNEAQ GNGEK-QGNV PCVESMECQY ENKHAIMAKL V----YKPFY ERSEPKNEAQ GNGEK-QGNV PCVESVECQY ENKHAIMAKL V----CKPFY ERSEPKNEAQ GNGEK-QGNV PCVESMECQY ENKHAIMAKL V----CKPFY DRSEPKNEAQ GNGEK-QGNV PCVESMDCQY ENKHAIMAKL V----CKPFY ERSEPKNEAQ GNGEK-QGNV PCVESMDCQY ENKHAIMAKL V----CKPFY ERSEPKNEAQ GNGEK-QGNV PCVESMDCQY ENKHAIMAKL V----VKPFY ARSEPKNEAQ GNGEK-QWNV PCVESMDCQY ENKHATMAKL V----VKPFY DRSEPKNEAQ GNGEK-QWNV PCVESMDCQY ENKHATMAKL V----IKPFY DRSEPKNEAQ GNGEK-QWNV PCVESMDCQY ENKHATMAKL V----VKPFY ERSEPKNEAQ GNGEK-QWNV PCVQSVDCQY ENKHATMAKL V----VKPFY ERSEPKNEAQ GNGEK-QWNV PCVQSVDCQY ENKHATMAKL V----VKPFY EVAESKNEAQ GNGEK-PVNV PCVESPDCQY ESKHARVNKL ---------- --CGIKDEAQ ADGQR-PATL IVVLSKDNKG ITEHGAVLKK ---------- --CGIKDEAQ ADGQR-PATL IVVLSKDNKG ITEHGAVLKK ---------- --CGIKDEAQ ADGQR-PATL IVVLSKDNKG ITEHGAVLKK GTFDAAKPFR QICGTKDEAQ PDGRR-PATL LVVLSKDNQG ITEHGAVLKH GTFDAAKPFR QICGVKDEAQ PDGQR-PATL LVVLSKDNVG ITEHGAVLKH VHG-ALYLIG MKTVVKPLLG DWEGCKCRS- --KLYVSQLD H----KTLSK VHG-ALYLIG MKTVVKPLLG DWEGCKCRS- --KLYVSQLD H----KTLSK VHG-ALYLIG MKTVVKPLLG DWEGCKCRS- --KLYVSQLD H----KTLSK VHG-ALYLIG MKTVAKPVLG DWEGCKCRSD LIKLYVSQLD H----KTLSK VHH-ALYLLG LRTVMKPHLG DVDGCRCRSD LNNLILSQLD D----KTLSK VHH-ALYLLG IHTVMKPHLG NVDGCRCRSD LNNLILSQLD D----KTLSK VHH-ALYLLG IHTVMKPHLG NVDGCRCRSD LNNLILSQLD D----KTLSK VHH-ALYLLG LRTVMKPHLG DVDGCRCRSD LNNLILSQLD D----KTLSK VHH-ALYLLG LRTVMKPHLG DVDGCRCRSD LNNLILSQLD D----KTLSK VHH-ALYLLG LRTVMKPHLG DVDGCRCRSD LNNLILSQLD D----KTLSK VHH-ALYLLA LHTMMKPRLG DIDGCRCRSE LNKLALSQLD D----KTLSK VHV-ALYLVG LKTVVKPKLG DWDGIRCRSE LNKLYLSQLD D----KTLSK VHV-ALYLVG LKTVVKPKLG DWDGVRCRSD LNKLYLSQLD D----KTLSK VHG-ALYLIA LKTVVKPKLG VWQGCRCRSQ LSKLVLSQLD E----KTLSK VHG-ALYLIA LKTVVKPKLG VWQGCRCRSQ LSKLVLSQLD E----KTLSK VHA-ALYLIG LKTVVKPELG VWEGCRCRSQ LSKLALSQLD E----KTLSK VHG-ALYLIG LQTVVKPKLG VWEGVQCRSQ LSKLILSQLD D----KTLSK VHG-ALYLIG LQNVVKPKLG VWEGVQCRSQ LSKLILSQLD D----KTLSK VHG-ALYLIG LQTVVKPKLG VWEGCQCRSQ LSKLILSQLD D----KTLSK MHT-ALYLMP LATVHKPEMG TVRGERCRAI L-KLLMMQLD ------TLSR MHS-ALYLMP LATVHKPEMG TVKGERCRTI L-KLLMVQLD E----KTLSQ MHS-ALYLMP LATVHKPQMG TVKGERCRTI L-KLLMVQLD E----KTLSQ MHS-ALYLMP LATVHKPEMG TVKGERCRTI L-KLLMVQLD E----KTLSQ MHS-ALYLMP LSTVHKPEMG TVKGERCRTI L-KLLMVQLD E----KTLSQ MHS-ALYLMP LATVHKPEMG TVKGERCRTI L-KLLMVQLD E----KTLSQ MHS-ALYLMP LATVHKPEMG TVKGERCRTI L-KLLMVQLD E----KTLSQ MHS-ALYLMP LATVHKPEMG TVKGERCRTI L-KLLMVQLD E----KTLSQ MHT-ALYLMP LPTVHKPKMG TVKGERCRAM L-KLNMMQLD E----KTLSQ MHT-SLYLMH IETCHKPVMG TVKGNRCRAI L-KLSMIQLD Q----KTLSQ MHT-SLYLMH IETCHKPVMG TVKGNRCRAI L-KLSMIQLD Q----KTLSQ VHT-SLYLMH IDTCHKPVMG TIKGNRCRAI L-KLTMIQLD Q----KTLSQ VHN-SLYLMH IDTCHKPAMG TVKGNRCRAI L-KLTMIQLD Q----KTLSQ VHT-SLYLMH IDTCHKPVMG TVKGNRCRAI L-KLTMIQLD Q----KTLSQ VHT-SLYLMH IDTCHKPVMG TVKGNRCRAI L-KLTMIQLD V----KTLSQ VHT-SLYLMH IDTCHKPVMG TVKGNRCRAI L-KLTMIQLD V----KTLSQ VHT-SLYLMH IDTCHKPVMG TVKGNRCRAI L-KLTMIQLD V----KTLSQ MHT-SLYLMH LGTCHKPVMG TVKGDRCRAI L-KLSMIQLD Q----KTLSQ MHT-SLYLMH LKTCHKPVMG TVKGDRCRAI L-KLKMIQLD Q----KTLSQ MHD-SLYLMH LETCHKPIMG TVKGDRCRAI L-KLPMIQLD Q----KTLSH MHD-SLYLMH LETCHKPIMG TVKGDRCRAI L-KLPMIQLD Q----KTLSH MHD-SLYLMH LETCHKPIMG TVKGDRCRAI L-KLPMIQLD Q----KTLSH MHD-SLYLMH LETCHKPIMG TVKGDRCRAI L-KLSMIQLD E----KTLSQ MHD-SLYLMH LETCHKPIMG TVKGDRCRAI L-KLNMIQLD Q----KTLSQ MHD-SLYLMH LETCHKPIMG TVKGDRCRAI L-KLNMIQLD Q----KTLSQ MHD-SLYLPH LETCHKPIMG TVKGGRCRAS L-KLKMIQLD Q----KTLSQ MHD-SLYLPH LETCHKPIMG TVKGGRCRAS L-KLNMIQLD Q----KTLSQ MHD-SLYLMH IETCHKPIMG TVKGDRCRAT L-KLKMIQLD Q----KTLSQ MHD-SLYLMH LETCHKPIMG TVKGDRCRAT L-KLKMIQLD Q----KTLSQ MHD-SLYLMH LETCHKPIMG TVKGDRCRAT L-KLKMIQLD Q----KTLSQ MHD-SLYLWH LETCVKPIMG TVKGDRCRAT L-KLKMIVLD P----KTLSQ MHQ-SLYLMH INIMSKPAMG EWVGNRCRNE LTALRIIQLD VGVSGKTLGQ MHQ-SLYLMH INVMSKPAMG EWVGNRCRND LTALRIIQLD VGHSGKTLGQ MHQ-SLYLMH INIMSKPAMG EWVGVRCRNE LTALRIVQLD VGHSGKTLGQ MHQ-SLYLMH INIMSKPAMG EWVGVRCRNE LTALRIVQLD VGHSGKTLGQ MHQ-SLYLMH VNIMSKPAMG EWVGVRCRNE LTALRIVQLD VGHSGKTLGQ MHQ-SLYLMH INIMSKPAMG EWVGVRCRNE LTALRIVQLD VGHSGKTLGQ MHQ-SLYLMH MNIMSKPAMG EWVGNRCRNE LTALHVVQLD VGFSGKTLGQ MHQ-SLYLMH MNIMSKPAMG EWVGNRCRNE LTALHIVQLD VGFSGKTLGQ MHH-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALQTVQLD VGSSGKTLGQ MHQ-SLYLMR VNTMPKPVMG DWDGNRCRAE LTALQTVQLD VGSSGKTLGQ MHQ-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALQTVQLD VGSSEKTLGQ MHQ-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALQTVQLD LGSSGKTLGQ MHQ-SLYLMR VNTMSKPVMG DWVGHRCRAE LTELQTVQLD VGSSGKTLGQ MHQ-SLYLMR VNTMSKPVMG DWVGHRCRAE LTELQTVQLD VGSSGKTLGQ MHQ-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALKTGQLD VGSSGKTLGQ MHQ-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALKTGQLD VGSSGKTLGQ MHQ-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALKTGQLD VGSSGKTLGQ MHQ-SLYLMR VNTMSKPVMG DWVGNRCRAE LTALKTVQLD VGSNGKTLGK MHQ-SLYLMR VNTMSKPVLG DWVGNRCRAE LTALKTVQLD IGNSGKTLGQ MHQ-SLYLMR ANTMSKPVMG DWVGSRCRAE LTALKTVQLD VGSSGKTLGQ MHQ-SLYLMR ANTMSKPVMG DWVGSRCRAE LTALKTVQLD VGSSGKTLGQ MHQ-SLYLMR ANTMSKPVMG DWVGSRCRAE LTALKIVQLD VGSSGKTLGQ MHQ-SLYLMR VNTMSKPIMG DWVGNRCRAD LTALKTVQLD VGSSGKTLGQ MHH-SLYLMR MHTMSKPIMG DVNGNRCRAD LTALKMLQLD IGFSAKTLGQ MHQ-SLYLMR MHTMSKPIMG HVNGNRCRAD LTALKMLQLD IGFSAKTLCQ MHQ-SLYLMR IHTMSKPIMG DVNGNRCRAD LTALKMLQLD IGFSAKTLGQ MHQ-SLYLMR MHTMSKPIMG DVNGNRCRAD LTALKMLQLD IGFSAKTLGQ MHQDSLYLMR MHTMSKPLMG DVNGNRCRAD LTSLKMLQLD IGFCAKTLGQ MHQ-SLYLMR MHTMSKPLMG DVNGNRCRAD LTSLKMLQLD IGFCAKTLGQ MHQ-SLYLMR MYTMSKPIMG DVNGNRCRAE LTSLKMLQLD IGFSAKTLGQ MHD-SLYLMR MHTMSKPVMG DVNGHRCRAD LTALKMLQLD IGFSAKTLGQ MHP-PLYLMR METMSKPPMG DVDGHRCRAG LTALKVVQLD IGFSSKTLGQ MHP-PLYLMR METMSKPPMG DVDGHRCRAK LTALKIVQLD IGFSAKTLGQ MHP-PLYLMR METMSKPPMG DVDGHRCRAG LTALKIVQLD IGFSAKTLGQ MHP-PLYLMR METMSKPPMG DVDGHRCRAG LTALKIVQLD IGFSAKTLGQ MHP-PLYLMR METMSKPPMG DVDGHRCRAG LTALKIVQLD IGFSAKTLGQ MHP-PLYLMR METMSKPPMG YVDGHRCRAG LTALKIVQLD IGFSAKTLGQ MHP-PLYLMR METMSKPPMG DVDGHRCRAG LTALKIVQLD IGFSAKTLGQ MHP-PLYLMR METMSKPPMG DVDGHRCRAG LTALKIVQLD IGFSAKTLGQ MHP-PLYLMR MQTMAKPPMG DVDGHRCRAG LTALKIVQLD IGFSQKTLGQ MHP-PLYLVK MQTMSKPPMG DVDGHRCRAG LTALKIVQLD IGFSSKTLGQ MHP-PLYLVK MQTMSKPPMG DVDGHRCRAG LTALQIVQLD IGFSAKTLGQ MHP-PLYLMR METMSKPPMG DVDGVRCRAG LTALKIVQLD IGFSAKTLGQ MHP-PLYLMR METMSKPPMG DVDGVRCRAG LTALKIVQLD IGFSAKTLGQ MHP-SLYLMC MEAMNKPIMG DWDGNRCRSP LCLLKVIQLD MGVSGKTLGQ IHP------- ---LCKRCME NGRCLRYKND LTGDQLVQLA D----NTCAW IHP------- ---LCKRCME NGRCLRYKND LAGDQLVQLA D----NTCAW IHP------- ---LCKRCME NGRCLRYKND LAGDQLVQLA D----NTCLW VHP------- ---LCKKDCN NNRCLRCKNV LAGNQLIQLS D----VTCAW VHP------- ---LCKKDCN NNRCLRCKNV LAGNQLVQLA D----ITCAW -NLGLY--MQ KRTWLATAPL Q------IGC MFMLVGRKKN SEE-NYNKAA -NLGLY--MQ KRTWLATAPL Q------IGC MFMLVGRKKN SEE-NYNKAA -NLGLY--MQ KRTWLATAPL Q------IGC MFMLVGRKKN SEE-NYNKAA -NMGLY--MR NRTWLATSPL Q------IGC IFMLVGRKKN SEE-NYNKAA -NCFTY--LG NMTWLATSPL H------IGC MIIFVGRKQQ SAN-NYNRAG -NCFTY--LG NMTWLATSPL H------IGC MIIFVGRKQQ SAN-NYNRAG -NCFTY--LG NMTWLATSPL H------IGC MIIFVGRKQQ SAN-NYNRAG -NCFTY--LG NMTWLATSPL H------IGC MIIFVGRKQQ SGN-NYNQAG -NCFTY--LG NMTWLATSPL H------IGC MIIFVGRKQQ SGN-NYNRAG -NCFTY--LG NMTWLATSPL H------IGC MIIFVGRKQQ SGN-NYNRAG -NCFQY--LG NMTWLATSPL H------VGC MIIF------ ---------- -NCFIY--LG NMTWLATSPL Q------IGW LVLFDGRKRQ SAE-DYNRAA -NCFRY--LG NMTWLATSPL Q------NGC VVIFAGRKRQ SAE-DYNRAA -NCSIY--LG NMTWLATSPL Q------IGC VILFGGRKRQ SHGLNYNRAA -NCSIY--LG NMTWLATSPL Q------IGC VILFGGRKRQ SHGLNYNRAA -NCSIY--LG NMTWLATSPL Q------IGC IILFGGRKRQ SHGLNYNRAA -NCSIY--LG NMTWLATSPL Q------IGC IIMFDGRKRQ SHGLQYNRAA -NCSIY--LG NMTWLATSPL Q------IGC IILFDGRKRQ SHGLQYNRAA -NCNIY--LG NMTWLATFPL Q------IGC IILFDGRKRQ SYGLQYNRAA -NQLPK--LC QGTWLDASPL Q------IGV QVMLVGKKGG SKK-EYELAA -NQIPD--LC QRTWLDASPL Q------IGV QVVLVGKKRG SKK-DYELAA -NQIPD--LC QRTWLDASPL Q------IGV QVVLVGKKRG SKK-DYELAA -NQIPD--LC QRTWLDASPL Q------IGV QVVLVGKKRG SKK-DYELAA -NQIPD--LC QRTWLDASPL Q------IGV QVVLVGKKRG SKK-DYELAA -NQIPD--LC QRTWLDASPL Q------IGV KVVLVGKKRG SKK-DYELAA -NQIPD--LC QRTWLDASPL Q------IGV QVVLVGKKRG SKK-DYELAA -NQIPD--LC QRTWLDASPL Q------IGV QVVLVGKKRG SKK-DYELAA -NQIIK--LC QRTWLDASPL Q------IGV NCVLPGKKGG SNK-DYELAA -NQRAK--LC QRTWLDTSPL Q------MGM TLVLVGKKVG SKK-DYEVAA -NQRAK--LC QRTWLDTSPL Q------MGM TLVLVGKKVG SKK-DYEVAA -NQRAR--LC QRTWLDTSPL Q------MGM TIVLVGKKVG SKK-DYEIAA -NQRAR--LC QRTWLDTSPL Q------MGM TIVLVGKKVG SKK-DYEIAA -NQRAR--LC QRTWLDTSPL Q------MGM TIVLVGKKVG SKK-DYEIAA -NQRAR--LC QRTWLDTSPL Q------MGM TIVLMGKKVG SKK-DYEIAA -NQRAR--LC QRTWLDTSPL Q------MGM TIVLMGKKVG SKK-DYEIAA -NQRAR--LC QRTWLDTSPL Q------MGM TIVLMGKKVG SKK-DYEIAA -NVRAR--LC QRTWLDTSPL Q------MGM VAVLLGKKLG SKK-DYEIAA -NQAAR--LC KRTWLDTSPL Q------MGM NVVLIGQKLG SKH-DYEIAA -NQPRR--LC QRTWLDASPL Q------VGM NIMLMGKKAP SKK-DYEIAA -NQPRR--LC QRTWLDASPL Q------VGM NIMLKGKKAP SKK-DYEIAA -NQPRR--LC QRTWLDASPL Q------VGM NIMLMGKKAP SKK-NYEIAA -NQRPG--LC QRTWLDANPL Q------MGM NTMLVGKKTP SKK-DYEIAA -NQRPG--LC QRTWLDASPL Q------MGM HIMLVGKKTP SKK-DYEIAA -NQRPG--LC QRTWLDASPL Q------MGM HIMLVGKKTP SKK-DYEIAA -NQQRR--LC QRTWLDSSPL Q------MGM KIMLQGKKTP SKK-DYEIAA -NQQRR--LC QRTWLDASPL Q------MGM KIMLQGKKIP SKK-DYEIAA -NQKRR--LC QRTWLDASPL Q------MGM KMMLQGKKTP SKK-DYEIAA -NQKKR--LC QCTWLDACPL Q------MGM KIMLQGKKTP SKK-DYEIAA -NQKKR--LC QCTWLDACPL Q------LGM KIMLQGKKTP SKK-DYEIAA -NQKRR--LC QRTWLDASPL Q------VGM KIMLQGKKTP SKK-DYEIAA -NIGVSKLLN DRTWLATSPL E------IGC GVMSVEKKEA SPK-EFEVAA -NIGVSKLLN DRTWLATSPL E------IGC LVMSVEKKEA SGK-EFEVAA -NIGVSKLLN DRTWLATSPL E------IGC GVISVEKKEA SPK-EFEVAA -NIGVSKLLN DRTWLATSPL E------IGC GVISVEKKEA SPK-EFEVAA -NIGVSKLLN DRTWLATSPL E------IGC GVISVEKKEA SPK-EFEVAA -NIGVSKLLN DRTWLATSPL E------IGC GVISVEKKEA SPK-EFEVAA -NIGISELLN DRTWLATSPL E------IGC GVMAVEKKEA SPK-EFEVAA -NIGISELLN DRTWLATSPL E------IGC GVMAVEKKEA SPK-EFEVAA -NIVTSKKLG DRTWLAASPL Q------IGC GVMVQEKKIA SPQ-EVEVAA -NIVTSKKLG DRTWLAASPL Q------IGC GVMVQEKKIA SPQ-EVEVAA -NIVTSKRLG DRTWLAASPL Q------IGC GVMVQEKKIA SPQ-EVEVAA -NIVTSKKLG DRTWLAASPL Q------IGC GIMVQEKKIA SPQ-EVEVAA -NIVTSKKLG DRTWLAASPL Q------IGC GVMVQEKKIA SPQ-QVEVAA -NIVTSKKLG DRTWLAAPPL Q------IGC DVMVQEKKIA SPQ-QVEVAA -NIMTSKKLG DRTWLAANPL Q------IGR GVMVWEKKVA SPQ-EVEVAA -NIMVSKKLG DRTWLAASPL Q------IGC GVMVWEKKVP SPQ-EVEVAA -NIMVSKKLG DRTWLAASPL Q------IGC GVMVWEKKVP SPQ-EVEVAA -NIMTPKKLG DRTWLAASPL Q------IGC GVMVQEKKVA SPQ-EVE-AA -NIMTPKRLG ERTWLAASPL Q------IGC GVVVQEKKVA SPQ-EVEVAA -NIMTPKKLG DRTWLAASPL Q------IGC DVMVQEKKVA SPQ-EVEVAA -NIMTPKKLG DRTWLAASPL Q------IGC DVMVQEKKVA SPQ-EVEVAA -NIMTPKKLG DRTWLAASPL Q------IGC DVMVQEKKVA SPQ-EVEVAA -NIMTPKNLG DKTWLAASPL Q------IGC GIMVQEKKVA SPQ-EVEVAA -NVVVPHLLG HRTWLATSPL Q------IGC GVMIFGNKIG SAN-EFEAAA -NIVVAHLLG HRTWLATSPL Q------IGC MVMIFGNKIG SAN-EFEAAA -NIVVPHCLG HRTWLATSPL Q------IGC MVMIFGNKIG SEN-EFEAAA -NIIVPHCLG HRTWLATSPL Q------IGC MVMIVGNKIG SEN-EFEAAA -NIVVPKLLG HRTWLATAPL Q------IGC GMMIFGNKIG STQ-EFEAAA -NIVVPKLLG HRTWLATAPL Q------IGC GMMIFGNKIG STQ-EFEAAA -NIVPPKLLG HRTWLATSPL Q------IGC GVMIFGNKIG STQ-EFESAA -NIVAPKLLG VRTWLATSPL Q------IGC GVMIFGNKIG STQ-EFEAAA LNIIDCKLLG HRTWLATSPL Q------IGV DVMVMGNKIG SPS-EFEVAA LNIIDCKLLG RHTWLATSPL Q------IGC DMMVMGNKIG SPS-EFEVAA LNIIDCKLLG HRTWLATSPL Q------IGC DVMVMGNKIG SPS-EFEVAA LNIIDCKLLG HRTWLATSPL Q------IGC DVMVMGNKIG SPS-EFEVAA LNIIDCKLLG HRTWLATSPL Q------IGC DVMVIGNKIG SPS-EFEIAA LNIIDCKLLG HRTWLATSPL Q------IGC DIMVMGNKIG SPS-EFEVAA LNIIDCKLLG HRTWLATSPL Q------IGC DVMVMGNKIG SPA-EFEVAA LNIIDCKLLG HRTWLATSPL Q------IGC DVMVVGNKIG SPA-GFEVAA -NIIDSKHLG HRTWLATSPL Q------IGC SAMVMGNKIG SPQ-EVELAA -NIIHSKLLG HRTWLATSPL Q------IGC DVMVMGNKIG SPQ-EVELAA -NIIHSKLLG HRTWLATSPL Q------IGC DVVVMGNKIG SPQ-EVELAA -NIIDSKLLG HRTWLATSPL Q------IGC DVMVVGNKIG SPQ-EFELAA -NIIDSKLLG HRTWLATSPL Q------IGC DVMVVGNKIG SPQ-EFELAA -NIVMAKLLG ERTWLATSPL Q------IGC DVVAVGKKPE SPQ-EFECAA -NLFEC--LR DSKSLFGSPL FIKVDEDRGF TAP------- SK-------V -NLFEC--LR DSKSLFGSPL FIKVDEDRGF TAP------- SK-------V -NLFEC--LR DSKSLFGSPL FIKVDEDRGF TAP------- SK-------V -NLFEC--LG DSESLFGSPL AIKVDEDRGF TAP------- SK-------V -NLFEC--LG DSESLFGSPL AIKVDEDRGF TAP------- SK-------V DPTVIWFYTQ PIVYV-ADVF GCTKGKNPQE MRG------N NVMSESLGDD DPTVIWFYTQ PIVYV-ADVF GCTKRKNPQE MRG------N NVMSESLGDD DPTVIWFYTQ PIVYV-ADVF GCTKGKNPQE MRG------N NVMSESLGDD DPTVIWFYTQ PIVYV-ADVF GCTKGKNPQE MRK------N NVISESLGDD DPTTICFCRK PIVMH-ADVF GCARAKDPQE MRM------N NVISESLGDD DPTTICFCRK PIVMH-ADVF GCARAKDPQE MRM------N NVISESLGDD DPTTICFCRK PIVMH-ADVF GCARAKDPQE MRM------N NVISESLGDD DPTTICFCRK PIVMH-ADVF GCARAKDPQE MRM------N NVISESLGDD DPTTICFCRK PIVMH-ADVF GCARAKDPQE MRM------N NVISESLGDD DPTTICFCRK PIVMH-ADVF GCARAKDPQE MRM------N NVISESLGDD ---------- ---------- ---------- --V------N NVLSQSLGDD DPTTISFCRK PIVQH-ADVF GCDRDKDPQE MRT------N NVISESLGDD DPTTIAFCRK PIVQH-ADVF GCDKDKDPQE MRM------N NVISESLGDD DPTTICFYKK PIVVQGADVF GCARGKDPQE MKA------N NVISGSLGDD DPTTICFYKK PIVVQGADVF GCARGKDPQE MKA------N NVISGSLGDD DPTMICFYKK PIVVQGADIF GCARGKDPQE MKS------N NVISGSLGDD DPTTICFYKK PIVVQGADIF GCARGKDPQE MKA------N NVTSGSLGDD DPTTICFYKK PIVVQGADIF GCARGKDPQE MKA------N NVTSGSLGDD DPTTICFYKK PIVVQGADIF GCARGKDPQE MKA------N NVTSGSLGDD DQVIIYFYQA PIIYVKADVF SGTVAKKAQA MR-------K STGSQSIGDD DPVIIYFYQA PIIHHKADVF AGTVAKKSQA MRS------R SIGSQSVGDD DPVIIYFYQA PIIYHKADVF AGTVAKKSQA MRS------R SIGSQSVGDD DPVIIYFYQA PIIYHKADVF AGTVAKKSQA MRS------R SIGSQSVGDD DPVIIYFYQA PIIHHKADVF AGTVAKKSQA MRS------R SIGSQSVGDD DPVIIYFYQA PIIHHKADVF AGTVAKKSQA MRS------R SIGSQSVGDD DPVIIYFYQA PIIHHKADVF AGTVAKKAQA MRS------R SIGSPSVGDD DPVIIYFYQA PIIHHKADVF AGTVAKKAQA MRS------R SIGSPSVGDD DPVIIYFYQA PIIHHKADVF SGTVAKKAQA MRQ------Q STGSQSVGDD DPVIMYFYDA PMIMRPTDVF EGTNNKKAQA MRS------R STASQSIGDD DPVIMYFYDA PMIMRPTDVF EGTNNKKAQA MRS------R STASQSIGDD DPVIMYFYQA PMIMRPTDVF EGTNNRKAQA MRS------R STASQSIGDD DPVIMYFYQA PMIMRPTDVF EGTNNRKAQA MRS------R STASQSIGDD DPVIMYFYQA PMIMRPTDVF EGTNNRKAQA MRS------R STASQSIGDD DPVIMYFYQA PMIMRPTDVF EGTNNRKAQA MRS------R STASQSIGDD DPVIMYFYQA PMIMRPTDVF EGTNNRKAQA MRS------R STASQSIGDD DPVIMYFYQA PMIMRPTDVF EGTNNRKAQA MRS------R STASQSIGDD DPVIKYFYQA PIIMRRTDVF EGTDDQKAQA MRV------R STASQSIGDD DPVIMYFYQA PIIMRRTDVF EGTDDQKAQA MRL------R STASRSIGDD DPVIMYFYQA PIIMRRTDVF EGTHNKKAQA MRV------R STASQSIGDD DPVIMYFYQA PIIMRRTDVF EGTHNKKAQA MRV------R STASQSIGDD DPVIMYFYQA PIIMRRTDVF EGTHNKKAQA MRV------R STASQSIGDD DPVIMYFYQA PIIMRRTDVF EGTNNKKSQA MRV------Q STSSQSIGDD DPVIMYFYQA PIIMQRTDVF EGTNNKKSQA MRV------H STSSQSIGDD DPVIMYFYQA PIIMQRTDVF EGTNNKKSQA MRV------H STSSQSIGDD DPAIMHFYRA PIIMRRTDVF EGTNNKKAQA MRI------R STASQSIGDD DPAIMYFYQA TIIMRRTDVF EGTNNKKAQA MRM------R STASQSIGDD DPVIMYFYQS PIIMRRTDVF EGTNNKKAQA MRV------R STASQSIGDD DPVIMYFYQS PIIMRRTDVF EGTNNKKAQA MRI------R STASQSIGDD DPVIMYFYQS PIIMRRTDVF EGTNNKKAQA MRV------R STASQSIGDD DPVIMYFYQS PIIMRHTDVF EGTNNKKAQA MRL------R STASQSIGDD DPTVIYFYRN LIIQKITDVL SAVRMESPQE MRT------Q DVNSSSLGDN DPTVIYFYRN LIIAKITDVL SAVRMESPQE MRE------Q DVNSSSLGDN DPTVIYFYRS LIIVHITDVL SAVRMDSPQE MRN------Q DVNSPSLGDN DPTVIYFYRS LIIVHITDVL SAVRMDSPQE MRN------Q DVNSPSLGDN DPTVIYFYRS LIIVHITDVL SAVRMDSPQE MRN------Q DVNSPSLGDN DPTVIYFYRS LIIVHITDVL SAVRMDSPQE MRN------Q DVNSPSLGDN DPTVIYFYRN LIIQHITDVL SAVRMDSPQE MRV------Q DVNSPSLGDN DPTVIYFYRN LIIQHITDVL SAVRMDSPQE MRV------Q DVNSPSLGDN D---IYFYRN MVVQRLTDVV AAVRMQSPQE MRTPVLVCIQ KVDSVSLGDD D---IYFYRN MVVQRLTDVV AAVRMQSPQE MRTPVLVCIQ KVDSVSLGDD D---IYFYRN MVVQRHTDVV AAVRMQSPQE MRNPVLVCIQ KVDSASLGDD D---IYFYRN MVVQRHTDVV AAVRMQSPQE MRNPVLVCIQ KVDSISLGDD D---IYFYRN MVVQRLTDVV AAVRMQSPQE MRCRKLVCIQ KVDSPSLGDD D---IYFYRN MVVQRLTDVV AAVRMQSPQE MRTPKLVCIQ KVDSPSLGDD D---IYFYRN MVIECLTDVV RAVRMQSPQE MRAPVLVCIQ KVDSPSLGDD D---IYFYRN MVIECLTDVV RAVRMQSPQE MRAPVLVCIQ KVDSPSLGDD D---IYFYRN MVIECLTDVV RAVRMQSPQE MRAPVLVCIQ KVDSPSLGDD D---IFFYRN MVIQRLTDVV AAVRMQSPQE MRPPVLVCIQ YVDSPSLGDD D---IYFYRN MVIQTLTDVV AAVRMQSPQE MRPPVLVCIQ DVDSVSLGDD D---IYFYRN MVIQRLTDVV AAVRMQSPQE MRLPVLVCIQ DVDSPSLGDD D---IYFYRN MVIQRLTDVV AAVRMQSPQE MRLPVLVCIQ DVDSPSLGDD D---IYFYRN MVIQRLTDVV AAVRMQSPQE MRLPVLVCIQ DVDSPSLGDD D---IYFYRN MVIQRLTDVV AAVRMQSPQE MRPPVLVCIQ DVDSPSLGDD DPTVIYFYRN RIVRRLADVI STVRMNSPQE MRN------Q DVDSHSLGDD DPTVIYFYRN RIVRRLADVI STVRMNSPQE MRE------Q DVDSHSLGDD DPTVIYFYRN RIVRRLADVI STVRMNSPQE MRN------Q DVDSHSLGDD DPTVIYFYRN RIVRRLADVI STVRMNSPQE MRN------Q DVDSHSLGDD DPTVIYFYRN RIVRRMADVI STVRMKSPQE MRG------Q DVDSHSLGDD DPTVIYFYRN RIVRRMADVI STVRMKSPQE MRG------Q DVDSHSLGDD DPTVIYFYRN RIVKRMADVI STVRMKSPQE MRG------Q DVDSHSLGDD DPTVIYFYRN RIIRRVADVI STVRMKSPQE MRE------Q DVDSHSLGDD DPTIIWFYRN CIVHKLADTV STAKMKSPQE MRH------Q DVDSPSLGDD DPTIIWFYRD CIVHKLADTV STAKMKAPQE MRV------Q DVDSPSLGDD DPTIIWFYRN CIVHKLADTV STAKMKAPQE MRV------Q DVDSPSLGDD DPTIIWFYRN CIVHKLADTV STAKMKAPQE MRH------Q DVDSPSLGDD DPTIIWFYRN CIVHKLADTV STAKMKAPQE MRV------Q DVDSPSLGDD DPTIIWFYRN CIVHKLADVV STAKMKSPQE MRV------Q DVDSPSLGDD DPTIIWFYRN CIVHKLADLV STAKMKSPQE MRV------Q DVDSPSLGDD DPTIIWFYRN CIVHKLADLV STAKMKSPQE MRV------Q DVDSPSLGDD DPTVIYFYRN CIVQKMADVV STVKMKSPQE MRV------Q DVDSASLGDD DPSVIYFYQN CIVQKMADVV STVKMKSPQE MRV------Q DVDSASLGDD DPSVIYFYQN CIVQKMADVV STVKMKSPQE MRV------Q DVDSASLGDD DPTVIYFYRN CIVQKLADVV STVKMKSPQE MRV------Q DVDSPSLGDD DPTVIYFYRN CIVQKLADVV STVKMKSPQE MRV------Q DVDSPSLGDD DPTVIYFYKN LIIQQAADYV SAVQVKSPQE MRY------Q DVNSPSDGDE EPKCAQFYSK SCTHC----- ---------- -TM------C SVGSHASEED EPKCAQFYSK SCTHC----- ---------- -TM------C SVGSHASEED EPKCAQFYSK SCTHC----- ---------- -TM------C SVGSHASEED EPKCAQFYSK SCTHH----- ---------- -MM------C SVGSNASEED EPKCQQFYSK SCTHC----- ---------- -VM------C SVGSNASEED NLQD--MSGV PVTVCTSSVM VRKDMQD-SV DKRGCTWNAK E-DHLCPSSF NLQD--MSGV PVTVCTSSVM VRKDMQD-SV DKRGCTWNAK E-DHLCPSSF NLQD--MSGV PVTVCTSSVM VRKDMQD-SV DKRGCTWNAK E-DHLCPSSF NLQD--MPGM PVTVCTSSVM VRKDMHQ-SV DKRGYTWHAK E-DHLCPDSF SLQQ--MPAV PVDVCVMSVM VNKSMPQ-SH DKRGYTWQAK Q-DHLFPVNV SLQQ--MPAV PVTVCVMSHM VKKSMPQ-SH DKRGYTWQAK Q-DHLFPVNC SLQQ--MPAV PVTVCVMSHM VKKSMPQ-SH DKRGYTWQAK Q-DHLFPVNC SLQQ--MPAV PVTVCVMSHM VKKSMPQ-SH DKRGYTWQAK Q-DHLFPVNF SLQQ--MPAV PVTVCVMSHM VKKSMPQ-SH DKRGYTWQAK Q-DHLFPVNF SLQQ--MPAV PVTVCVMSHM VKKSMPQ-SH DKRGYTWQAK Q-DHLFPVNF SLQQ--MHAM PVTVVVVSHM VKKSMPQ-SH DKRGYTWQAK DFDQLIPVSF FLQQ--MPAV PVSVCVYSHM VKKFVPQ-SH EKRGYTWKAK E-DHLVPISY FLQQ--MPAV PVSVCVHSHM VKKFVPQ-SH EKRGVTWKAK E-DHLVPISL RLQQ--MPAM PVTICVSSYM VKKSVIQ-SH QKRGYTWRAK E-DHLIPVSF RLQQ--MPAM PVTICVSSYM VKKSVPQ-SH QKRGYTWRAK E-DHLIPVSF RLQH--MPAM PVTICVFSYM VKKAVPQ-SH QKRGYTWRAK E-DHLIPVSF RLQQ--MPAM PVTICVFSYM VKKSVPQ-SH VKRGYTWRAK E-DHLIPVSF RLQQ--MPAM PVTICVFSYM VKKLVPQ-SH QKRGYTWRAK E-DHLIPVSF RLQQ--MPAM PVTICVFSYM VKKSVPQ-SH QKRGYTWRAK E-DHLIPVSF GMQS--MPLM QNAVCVWSKM VRKVQPD-GQ DKREQTWMAK D-DTLCPPSM GMQN--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWMAK D-DTLCPPCE GMQN--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWMAK D-DTLCPPCE GMQN--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWMAK D-DTLCPPCE GMQN--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWMAK D-DTLCPPCE GMQN--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWMAK D-DALCPPCK GMQN--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWVAK D-DTLCPPCK GMQK--MPLM QNAVCVWSQM VRKVQPD-GQ DKREQTWVAK D-DTLCPPCK GTQV--MPLM QNLVCVWSKM VRKCMID-GQ EKREQTWMAK D-DKLCPPSQ DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK D-DTLCPQSR DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK D-DTLCPQSR DMLN--MPLT QNAMCVESEM VRKSQPD-GQ DKRGYTWVAK E-DDLCPQSG DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK E-DTLCPQSG DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK E-DTLCPQSE DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK E-DTLCPQSG DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK E-DTLCPQSG DMLN--MPLT QNAMCVESEM VRKCQPD-GQ DKRGYTWVAK E-DTLCPQSG DMLI--MPLV QNAMCVKSEM VRKCQPD-GP DKRGYTWMAK D-DTLCPVSA DMLI--MPLV QNAMCVNPEM VRKCQPD-GQ DKRGYTWMAK N-DTLCPVSA DMLK--MPLV QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQSA DMLK--MPLV QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQSA DMLK--MPLR QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQSA DMLN--MPLV QNAMCVSSEM VRKCQPD-GQ NKRGCTWMAK N-DTLCPQSG DMLN--MPLV QNAMCVSSEM VRKCQPD-GQ NKRGYTWMAK H-DTLCPQSG DMLN--MPLV QNAMCVSSEM VRKCQPD-GQ NKRGYTWMAK H-DTLCPQSG DMLN--MPLH QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQSP DMLN--MPLH QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLYPQSA EMLN--MPLV QNAMCVPSEM VRKCQPD-GQ DKRGYTWMAK D-DPLCPQSA EMLN--MPQV QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQSA EMLN--MPLV QNAMCVSSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQSA EMLN--MPLI QNAMCVQSEM VRKCQPD-GQ DKRGYTWMAK D-DTLCPQLH NGQA--MNIV PYWVCVASGV VRKTHKD-SV DKRGQTWTAK S-DFLCPLAV NGQA--MNIV PYWVCVGSGV VRKTHKD-SV DKRGQTWTAK S-DFLCPLAM NGQA--MNIV PYWVCVASGV VRKMHKD-SV EKRGQTWEAK S-DFLCPLAV NGQA--MNIV PYWVCVASGV VRKMHKD-SV EKRGQTWEAK S-DFLCPLAV NGQA--MNIV PYWVCVASGV VRKMHKD-SV EKRGQTWEAK S-DFLCPLAV NGQA--MNIV PYWVCVASGV VRKMHKD-SV EKRGQTWEAK S-DFLCPLAV NGQA--MNIV PYWVCVVSGV VRKTHKD-SV EKRGQTWTAK S-DFLCPIAV NGQA--MNIV PYWVCVVSGV VRKTHKD-SV EKRGQTWTAK S-DFLCPIAV DPQM--MNIV PYWVCVCSGV VKKAEPD-SV DKRGKTWVAK S-DFLCPAAV DPQM--MNIV PYWVCVCSGV VKKAEPD-SV DKRGKTWVAK S-DFLCPPAV DPQM--MKIV PYWVCVCSGV VKKAEPD-SV DKRGKTWVAK S-DFLCPPAV DPQM--MNIV PYWVCVCSGV VKKAEPD-SV DKRGKTWVAK S-DFLCPPAV DPQM--MNIV PYWVCVCSGV VKKAEPD-SV DKRGKTWVAK S-DFLCPPAV DPQC--MNIV PYWVCVCSGV VKKAEPD-SV DKRGKTWVAK S-DFLCPPAV VPQM--MNIV PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK S-DFLCPPAM VPQM--MNIV PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK S-DFLCPPAM VPQM--MNIV PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK S-DFLCPPAM TPQM--MNIA PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK A-DFLCPPAV TPQM--MNIA PYWVCVCSGV VKKTQPD-SV DKRGKTWVAK S-DFLCPPAV TPQK--MNIA PYWVCVCSGV VKKAQLD-SV EKRGKTWVAK S-DFLCPPAV TPQK--MNIA PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK S-DFLCPPAV TPQK--MNIA PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK S-DFLCPPAV TPQM--MNIA PYWVCVCSGV VKKAQPD-SV DKRGKTWVAK S-DFLCPPAI DNQA--MVIL PYWVCVCSGV VRKEHPDTSI NKRGRTWLAK V-DFLCPPAF DNQA--MNIL PYWVCVCSGV VRKEHPDTSI NKRGRTWLAK M-DFLCPPAF DNQA--MNIL PYVVCVCSGV VRKDHPDTSI HKRGRTWLAK V-DFLCPPAF DNQA--MNIL PYVVCVCSGV VRKEHPDTSI SKRGRTWLAK I-DFLCPPAF DNQA--MNIL PYWVCVCSGV VRKEHPDTSI NKRGRTWLAK I-DFLFPPAF DNQA--MNIL PYWVCVCSGV VRKEHPDTSI NKRGRTWLAK I-DFLFPPAF DNQA--MNIL PYWVCVCSGV VRKEHPDTSI NKRGRTWLAK I-DFLFPPAF DNQA--MTIL PHWVCVVSGV VRKEHPDTSI NKRGSTWLAK V-DFLCPPAF DQQA--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK N-DFLCPPAF DQQE--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK N-DFLCPPAF DQQA--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK N-DFLCPPAF DQQA--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK N-DFLCPPAF DQQA--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK N-DFLCPPAF DQQA--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK N-DFLCPPAF DQEA--MDIL PYWVCVMSGV VRKEQPD-SV NKRGRTWLAK K-DFLVPPAF DQEA--MDIL PYWVCVTSGV VRKEQPD-SV NKRGRTWLAK K-DFLVPPAF DQQP--MNII PYWVCVTSGV VHKEQPD-SV NKRGRTWTAK N-DFLCPDAF DQQP--MNII PYWVCVTSGV VRKEQPD-SV NKRGHTWTAK N-DFLCPPAY DQQP--MNII PYWVCVTSGV VRKEQPD-SV NKRGRTWSAK N-DFLCPPAF NQQS--MNII PYWVCVHSGV VQKEQPD-SV HKRGRTWTAK N-DFLCPPAF NQQS--MNII PYWVCVHSGV VQKEQPD-SV HKRGRTWTAK N-DFLCPPAF NGQS--MHIG PYWVCVSSEV VKKSQPD-SV DKRGRTWVAK N-EFLCPPDH ALDLYTHKPV PDAQCFVSRV ARNIPEH-SP CK-------- ---------- ALDLYTHRPV PDAQCFVSRV ARNIPEH-SP CK-------- ---------- ALDLYTHKPV PDAQCFVSRV ARNIPEH-SP CK-------- ---------- ALELYTHKPV PDPQCFVSIV VRNIPEH-SP CK-------- ---------- ALELYTHKPV PDPQCFVSIV VRNIPEH-SP CK-------- ---------- CKGER---ED EPGGVTQ--- ---------- ----RCIENI AKLLYIKDV- CKGER---ED EPGGVTQ--- ---------- ----RCIENI AKLLYIKDV- CKGER---ED EPGGVTQ--- ---------- ----RCIENI AKLLYIKDV- CKGEK---EE EPGGVRR--- ---------- ----RIVENI VKLLYIKDV- VKGEQ---EE EPEGADQ--- ---------- ----RTVHDM VRLLYSKDV- VKGEQ---EE EPEGADQ--- ---------- ----RTVHDM VRLLYSKDV- VKGEQ---EE EPEGADQ--- ---------- ----RTVHDM VRLLYSKDV- VKGEQ---EE EPEGADQ--- ---------- ----RTVHDM VRLLYSKDV- VKGEQ---EE EPEGADQ--- ---------- ----RTVHDM VRLLYSKDV- VKGEQ---EE EPEGADQ--- ---------- ----RTVHDM VRLLYSKDV- VKGEQ---EE EPEGPNN--- ---------- ----RVVHNI VKLLYTKDV- CKGEH---EE EPEGAQY--- ---------- ----RVVQNI VKLLYTKDV- CKGEH---EE EPEGAQY--- ---------- ----RCVQNI VKLLYTKDV- CKGEL---DE EPDAAQQ--- ---------- ----RIVQNI VKLLYTKDV- CKGEL---DE EPDAAQQ--- ---------- ----RIVQNI VKLLYTKDV- CKGEL---DE EPDGAQQ--- ---------- ----RVVQNI VKLLYTKDV- CKGEL---EE EPEGAQH--- ---------- ----RMVQNI VKLLYTKDV- CKGEL---EE EPEGAKQ--- ---------- ----RMVQNI VKLLYTKDV- CKGEL---EE EPEGAQQ--- ---------- ----RIVQNI VKLLYTKDI- KRGEK---TA EPTQWMG--- ---------- ----TVTVNK IKLLYCKDC- EQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- EQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- EQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- EQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- EQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- EQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- DQGEK---AA EPTQWVG--- ---------- ----TCTSNV IKLLYCKDC- EQGEK---AA EPTQWED--- ---------- ----ICTANV IKLLYCKDC- DQGEK---HH EPHHIRN--- ---------- ----KFSENV IKLLYCKDC- DQGEK---HH EPHHIRN--- ---------- ----KFSENV IKLLYCKDC- DQGEK---YA EPQHIRN--- ---------- ----QFSENV IKLLYCKDC- DQGEK---YA EPQHIRN--- ---------- ----KFSENV IKLLYCKDC- DQGEK---YA EPQHIRN--- ---------- ----KFSENV IKLLYCKDC- DQGEK---YA EPQHIRN--- ---------- ----KFSENV IKLLYCKDC- DQGEK---YA EPQHIRN--- ---------- ----KFSENV IKLLYCKDC- DQGEK---YA EPQHIRN--- ---------- ----KFSENV IKLLYCKDC- GQGEK---YA EPHQIKN--- ---------- ----KFSVNV IKLLYCKDC- GQGEC---YA EPHQTCN--- ---------- ----KFRVNV IKLLYCKDC- NQGEK---YA EPHVIGD--- ---------- ----KVSVNV IKLLYCKDC- NQGEK---YA EPHVIGD--- ---------- ----KVSVNV IKLLYCKDC- NQGEK---YA EPVVIGD--- ---------- ----KVSVNV IKLLYCKDC- NQGEK---FA EPHQIRN--- ---------- ----KVSVNV IKLLYCKDC- NQGEK---YA EPHQIRN--- ---------- ----KVSVNV IKLLYCKDC- NQGEK---YA EPHQIRN--- ---------- ----KVSVNV IKLLYCKDC- NQGEK---YA EPHQIRN--- ---------- ----KISVNV IKLLYCKDC- NQGEK---YA EPHQIRN--- ---------- ----KISVNV IKLLYCKDC- NVGEK---YA EPHQIRN--- ---------- ----KVSVNV IKLLYCKDC- NVGEK---YA EPHQIRN--- ---------- ----KVSTNV IKLLYCKDC- NQGEK---YA EPHQIRN--- ---------- ----KVSTNV IKLLYCKDC- NQAEK---YA EPHQIRN--- ---------- ----KVSVNV IKLLYCKDC- DQGEP---GA EQKPAVGEEN PICKPWQFVI MNTTTFAKNI FRLLYVKDV- DQGEI---GA EQKPAVGEKN PICKPWQFVI MNTTTFAKNI FRLLYVKDV- DQGEP---AA EQRPAVGEKN PICKPWQFVI MNTTTFAKNI FRLLYVKDV- DQGEP---AA EQRPAVGEKN PICKPWQFVI MNTTTFAKNI FRLLYVKDV- DQGEP---AA EQKPAVGEKN PICKPWQFVI MNTTTFAKNI FRLLYVKDV- DQGEP---AA EQKPAVGEKN PICKPWQFVI MNTTTFAKNI FRLLYVKDV- NQGEP---GA EQKPAVGA-N PIVKPWQFVI MNTTTFAKNI FRLLYVKDV- NQGEP---GA EQKPAVGA-D PIVKPWQFVI MNTTTFAKNI FRLLYVKDV- NEGEH---GA EQRP-FGTQN PVCKPWRFVI MNTQTFAKNI IRLLYIKDI- NEGEH---GA EQRPVFGTHN PVCKPWRFVI MHTQTFSKNI IRLLYIKDI- NEGEH---GA EQRPVFGTQN PVCKPWRFVI MNTQTFSKNI IRLLYIKDI- NEGEH---GA EQRPVFGTQN PVCRPWRFVI MNMQTFSKNI IRLLYIKDI- NEGEH---GA EQRPVFGGQN PACKPWHFVI MNRQTFAKNM IRLLYIKDI- NEGEH---GA EQRPVFGGQN PACQPWHFVI MNTQTFAKNM IRLLYIKDI- NEGEH---GA EQRPVFGTQN PVCKPWRFVI MNTETFAKNI IRLLYVKDV- NEGEH---GA EQRPVFGTQN PVCKPWRFVI MNVETFAKNI IRLLYVKDV- NEGEH---GA EQRPVFGTQN PVCKPWRFVI MNVETFAKNI IRLLYVKDV- SEGEH---GA EQRPVFGMQN PACKPWRFVI MNTQTFAKNI IRLLYIKDV- SEGEH---GA EQRPVFGMRN PICKPWRFVI MNTQTFAKNI IRLLYVKDV- SEGEH---GP EQRPVFGVQN PVCKPWRFVI MNTQTFAANI IRLLYVKDV- SEGEH---GP EQRPVFGVQN PVCKPWRFVI MNTQT-AKNI IRLLYVKDV- SEGEH---GP EQRPVFGMQN PVCKPWRFVI MNTQTFAKNI IRLLYVKDV- SEGEH---GA EQRPVFGMQN PVCRPWRFVI MNTQTFAKNV IRLLYVKDV- NQGEH---RA EQKPASKFEH PVCGPWEFVI VNYQTSAKDI ILLLYIKDV- NQGEH---RA EQKPALKFKH PVCGPWEFVI VNYQTSAKDI ILLLYIKDV- NQGEH---RA EQKPAFKFKH PVCGPWEFVI VNYQTEAKDI ILLLYIKDV- NQGEH---RA EQKPAFKFKH PCCGPWEFVI VNYQTSAKDI ILLLYIKDV- NQGEH---HA EQKPVFGFKH PHCGPWEFVI CNYQTVAKDI ILLLYIKDV- NQGEH---HA EQKPVFGFKH PHCGPWEFVI CNYQTVAKDI ILLLYIKDV- NQGEH---HA EQKPVFGFKH PHCGPWEFVI CNYQTMAKDI ILLLYIKDV- NQGEH---HA EQKPVFGFKH PHVGPWEFVI CNYQTSAHDI ILLLYIKDV- NQGEH---ER EQRPVYGFKH PGCRPWQFVI ANYQTSAKNI IMLLYVKDV- NQGEH---EA EQRPVYGFVH PRCRPWQFVI ANYQTSAKNI IMLLYVKDV- NQGEH---GA EQRPVYGFQH PRCRPWQFVI ANYQTFAKNI IMLLYVKDV- KQGEH---EA EQRPVYGFQH PRCRPWQFVI ANYQSSAKNI IMLLYVKDV- NQGEH---EA EQRPVYGFQH PRCRPWQFVI GNYQTSAKNI IMLLYVKDV- EQGEH---EA EQCPVHGFKH PRCRPWQFVI ADYQTSAKNI IMLLYVKDV- NQGEH---EA EQKPVYGFDH PRCRPWQFVI ANYQTSAKNI IMLLYVKDV- NQGEH---EA EQKPVYGFDH PRCRPWQFVI ANYQTSAKNI IMLLYVKDV- NQGEH---GA EQKPTHGFEH PRCRPWQFVI PVYQTGAKNI ILLLYIKDV- NQGEH---GA EQKPVHGFEH PRCRPWQFVI SVYRTGAKNI NLLLYIKDV- NQGEH---GA EQKPVHEFQH PRCRPWQFVI SVYRTGAKNI NLLLYIKDV- DQGEH---GA EQKPVHGFEH PRCRPWQFVI ANYQTGAKNI ILLLYVKDV- DQGEH---GA EQKPVHGFEH PRCRPWQFVI ANYQTGAKNI ILLLYVKDV- MQGEHSIEGA EQKPFFGFAG PFPQPWQFVI VNPQTWAHNI IRLLYCKDV- ---------- -P---CS--- ---------- ----VEEVCY CKVCDSKDVR ---------- -P---CS--- ---------- ----VEEVCY CKVCDSKDVR ---------- -P---CS--- ---------- ----VEEVCY CKVCDSKDVR ---------- -P---CS--- ---------- ----VEEVAY CKNCDSKDVR ---------- -P---CS--- ---------- ----VEEVCY CKNCDSKDVR LCNEVLNGLQ WQLCWSV-GD WESLIPQACW DAKKDLAVCA WKMELVPGL- LCNEVLNGLQ WQLCWSV-GD WESLIPQACW DAKKDLAVCA WKMELVPGL- LCNDVLNLLV WQLCWSV-GD WESLIPQACW DAKKDLAVCA WKMELVPGL- LCNQVLNLLQ WQLCWSV-GD WESLVPQACW GAKKDLAVCA WKMELVPGL- DCNMVLSLLV WQLC--H-GD WEPLLPQACE SARRDLAVCA WKRELVPGL- DCNIVLSLLV WQLC--H-GD WEPLLPQACE GARRDLAVCA WKRELVPGL- DCNIVLSLLV WQLC--H-GD WEPLLPQACE GARRDLAVCA WKRELVPGL- DCNIDLSLLV WQLC--H-GD WEPLLPQACE SARRDLAVCA WKRELVPGL- DCNIVLSLLV WQLC--H-GD WEPLLPQACE SARRDLAVCA WKRELVPGL- DCNIVLSLLV WQLC--H-GD WEPLLPQACE SARRDLAVCA WKRELVPGL- DCNTVLSLLI WQLC--H-GD WEQLVPQACA GARSDLAVCA WKRELVPGL- DCNVVLSLLV WQLC--H-GD WEPLIPQACQ TAKKDLAVCA WKRELVPGL- DCNVVLSLLV WQLC--H-GD WEPLVPQACQ TAKKDLAVCA WKRELVPGL- SCNVVLSMLI WQIC--H-GD WEPQIPKACS NARKDLAVCA WKRELVPGL- SCNVVLSMLI WQIC--H-GD WEPQIPKACS NARKDLAVCA WKRELVPGL- SCNKVLSMLI WQIC--H-GD WEPQIPKACN AARKDLAVCA WKRELVPGL- SCNVVLSMLI WQIC--H-GD WEPQIPKACD AARKDLAVCA WKRELVPGL- SCNVVLSMLI WQIC--H-GD WEPQIPKACD AARKDLAVCA WKRELVPGL- SCNVVLSMLI WQIC--H-GD WEPQLPKACD AARKDLAVCA WKRELVPGL- SCNEVMKILS WWLCNSV-GD WQTLMSQACI TADPNPPVCV WKRELVPGL- SCNEVLKILA WWLCDSV-GD WQTLMNDACR SANPNYPVCI WKRELVPGL- SCNEVLKILA WWLCDSV-GD WQTLMNDACR SANPNYPVCI WKRELVPGL- SCNEVLKILA WWLCDSV-GD WQTLMNDACR SANPNYPVCI WKRELVPGL- SCNEVLKILA WWLCDSV-GD WQTLMNDACR SANPNYPVCI WKRELVPGL- SCNEVLKILA WWLCDSV-GD WQSLMNDACR SANPNYPVCI WKRELVPGL- SCNEVLKILA WWLCDSV-GD WQTLMNDACS SANPNYPVCI WKRELVPGL- SCNEVLKILA WWLCDSV-GD WQTLMNDACS SANPNYPVCI WKRELVPGL- SCNEVLRVLS WWLCDSV-GD WRTLMSDACA LANPNPPVCV WKRELVPGL- SCNKVLKILV WQLCESV-DD WQTLSSDACM HAEQNPPVCV WQRELVPQL- SCNKVLKILV WQLCESV-DD WQTLSSDACM HAEQNPPVCV WQRELVPQL- SCNKVLKILV WQLCESV-DD WQTLASDACV SAEPNPPVCV WQRELVPQL- SCNKVLKILV WQLCESV-DD WQTLASDACV SAEPNTPVCV WQRELVPQL- SCNKVLKILV WQLCESV-DD WQTLASDACV SAEPNPPVCV WQRELVPQL- SCNKVLKILV WQLCESV-DD WQTLASDACV SAEPNPPVCV WQRELVPQL- SCNKVLKILV WQLCESV-DD WQTLASDACV SAEHNPPVCV WQRELVPQL- SCNKVLKILV WQLCESV-DD WQTLASDACV SAEPNPPVCV WQRELVPQL- SCNKVLKILV WQLCESV-DD WVTLASDACL SAEPQPPVCV WQRELVPQL- SCNKVLKILK WQLCESV-DD WQTLASDACI LAKPQPPVCV WQRELVPQL- SCNQVLKILV WQLCESV-DD WQTLTSDPCM SAQVNPPVCV WQRELVPQL- SCNQVLKILI WQLCESV-DD WQTLTSDPCM SAQVNPPVCV WQRELVPQL- SCNQVLKILV WQLCESV-DD WQTLTSDPCM SAQVNPPVCV WQRELVPQL- SCNKMLKILV WQLCESV-DD WQTLASDPCV SAEPNSPVCV WQRELVPQL- SCNKVLKILV WQLCESV-DD WQTLASDPCM SAEPNPPVCV WQRELVPQL- SCNKVLKILV WQLCESV-DD WQTLASDPCM SAEPNPPVCV WQRELVPQL- SCNQVLKILV WQLCESV-DD WQTLTSDPCV PAEPNPPVCV WQRELVPQL- SCNKVLKILV WQLCESV-DD WQTLTSDPCV PAEPNPPVCV WQRELVPQL- SCNKVLKILV WQLCDSVQDD WQPLTSDPCV NAEPNPPVCV WQRELVPQL- SCNKVLKILV WQLCDSV-DD WQTLTSDPCV SAEPNPPVCV WQRELVPQL- SCNKVLKILV WQLCDSV-DD WQTLTSDPCV SAEPNPPVCV WQRELVPQL- SCNKVLKILV WQLCESV-DD WQTLTSDPCI SAEPNPPVCV WQRELVPQL- SCNGVLPLLK WQLCDSH-GD WQSLYADSCP IA-VNAAVCG WKRELVPGL- SCNGVLPLLK WQLCDSH-GD WQSLYADSCP IA-INAAVCG WKRELVPGL- SCNGVLPLLK WQLCDSH-GD WQSLFPDSCP FA-LNAAVCG WKRELVPGL- SCNGVLPLLK WQLCDSH-GD WQSLFPDSCP FA-LNAAVCG WKRELVPGL- SCNGVLPLLK WQLCDSH-GD WQSLFPDSCP FA-LNAAVCG WKRELVPGL- SCNGVLPLLK WQLCDSH-GD WQSLFPDSCP FA-LNAAVCG WKRELVPGL- SCNGVLPLLK WQLCDSH-GD WQSLFADSCP TA-VNAAVCA WKRELVPGL- SCNGVLPLLK WQLCDSH-GD WQSLFADSCP IA-VNAAVCA WKRELVPGL- SCNQVLQLLD WQLCDSH-GD WQSLMADSCP NA-VNIAVCC WKRELMPGL- SCNQVLQLLD WQLCDSH-GD WQSLMADSCP NA-VNIAVCC WKRELMPGL- SCNQVLQLLD WQLCDSH-GD WQSLMADSCP PA-VNIAVCC WKRELMPGL- SCNQVLQLLD WQLCDSH-GD WQSLMADSCP PA-VNIAVCC WKRELMPGL- SCNEVLQLLD WQLCDSH-GD WQSLIADSCP NA-SNIAVCC WKRELMPGL- SCNQVLQLLD WQLCDSH-GD WQSLIADSCP NA-VNIAVCC WKRELMPGL- SCNQVLQLLD WQLCDSH-GD WQSLVADSCP NA-VNIAVCC WKRELMPGL- SCNQVLQLLD WQLCDSH-GD WQSMVADSCH NA-VNIAVCC WKRELMPGL- SCNQVLQLLD WQLCDSH-GD WQSMVADSCH NA-VNIAVCC WKRELMPGL- SCNAVLQLLD WQLCYSH-GD WQSLIADSCP TA-ANIAVCC WKRELMPGL- SCNAVLQLLD WQLCYSH-GD WQSLIADSCS TA-ANIAVCC WKRELMPGL- SCNAVLQLLD WQLCYSH-ED WQSLIADACP TA-VNIAVCC WKRELMPGL- SCNAVLQLLD WQLCYSH-GD WQSLIADACS TA-VNIAVCC WKRELMPGL- SCNAVLQLLD WQLCYSH-GD WQSLIADACP TA-VNIAVCC WKRELMPGL- SCNKVLQLLD WQLCYSH-GD WQSLIADACP AA-VNIAVCC WKRELMPGL- GCNCVLELLN WQLCTSH-GD WQSLVADSCV WA-HNVAVCA WKRELVPGL- GCNCVLELLK WQLCTSH-GD WQSLVADSCV WA-HNVAVCA WKRELVPGL- GCNCVLELLE WQLCTSH-GD WQSLVADSCV WA-HNVAVCA WKRELVPGL- GCNCVLELLK WQLCTSH-GD WQSLVADSCV WA-HNVAVCE WKRELVPGL- GCNCVLELLA WQLCTSH-GD WQSLVADSCI WA-HNVAVCA WKRELVPGL- GCNCVLELLA WQLCTSH-GD WQSLVADSCI WA-HNVAVCA WKRELVPGL- GCNCVLELLA WQLCTSH-GD WQSLVADSCI WA-HNVAVCA WKRELVPGL- GCNCVLELLV WQLCTSH-GD WQSLVADSCV WA-HNVAVCA WKRELVPGL- SCNGVLQLLN WHLCDSH-GD WQSLVADSCC WA-HNVAVCG WKRELVPGL- SCNGVLQLLN WQLCDSH-GD WQSLVADSCV WA-HNVAVCG WKRELVPGL- SCNRVLQLLN WQLCDSH-GD WQSLVADSCV WA-HNVAVCG WKRELVPGL- SCNGVLQLLN WQLCDSH-GD WQSLVADSCV WA-HNVAVCG WKRELVPGL- SCNGVLQLLN WQLCDSH-GD WQSLVADSCM WA-HNVAVCG WKRELVPGL- SCNGVLQLLN WQLCDSH-GD WQSLVADSCV WA-HNVAVCG WKRELVPGL- SCNGVLQLLN WQLCDSH-GD WQSLVADSCV WA-HNVAVCG WKRELVPGL- SCNGVLQLLN WQLCDSH-GD WQSLVADSCV WA-HNVAVCG WKRELVPGL- SVNGVLQLLI WQLCDSH-GD WQSLVADSCA WA-VNVAVCG WKRELVPGL- SCNGVLQLLN WQLCDSH-GD WQSLVADSCA WA-VNVAVCG WKRELVPGL- SCNGVLQLLN WQLCDSH-GD WQSLVADSCA WA-VNLAVCG WKRELVPGL- SCNGVLQLLN WQLCDSH-GD WQSLSADSCA WA-HNVAVCG WKRELVPGL- SCNGVLQLLN WQLCDSH-GD WQSLSADSCA WA-HNVAVCG WKRELVPGL- SCNCVLTCLN WVLCDSH-GD WHSLIADACP CA-HNVAVCG WKRELVPGL- AVKNAYQDLQ VPLSKLK-AP WLSMGHCECW EEDINNILSV VKHELVDDVD AVKNAYQDLQ VPLSKLK-AP WLSMGHCECW EEDINNILSV VKHELVDDVD AVKNAYQDLQ VPLSKLK-AP WLSMGHCECW EEDINNILSV VKHELVDDVD AVQNAYQDLA VPLSKLK-AP WLSMGHCECW EEDINNILSM VKHELVVDQD ATQNAYQELA IPLSKLK-AP WLSMGHCECW EEGINNILSM VKHELVVDQD --NRNNENLA KVIYFGPDGH -------DEG PMQAKIVTLH W-EMDVSHRG --NRNNENLA KVIYFGPDGH -------DEG PMQAKIVTLH W-EMDVSHRG --NRNNENLA KVIYFGPDGH -------DEG PMQAKIVTLH W-EMDVSHRG --NRNNENLA KIIYFGPDGH -------DEG NMQAKIVTLH W-EMDVSHRG --NHNNEFLA KIIYFGPDGH -------DEG PMGQKIVDLH W-EMEASHRE --NHNNEFLA KIIYFGPDGH -------DEG PMGQKIVDLH W-EMEASHRE --NHNNEFLA KIIYFGPDGH -------DEG PMGQKIVDLH W-EMEASHRE --NHNNEFLA KIIYFGPDGH -------DEG PMGQKIVDLH W-EMEASHRE --NHNNEFLA KIIYFGPDGH -------DEG PMGQKIVDLH W-EMEASHRE --NHNNEFLA KIIYFGPDGH -------DEG PMGQKIVDLH W-EMEASHRE --NHNNENLA KVIYFGPDGH -------DEG PMGRKIVDLH W-EMEASHRD --NCNNEYLA KIIYFGPDGH -------DEG PMGRKIVDLH W-EMEASHRD --NCNNEYLA KIIYFGPDGH -------DEG PMGRKIIQLH W-EMEASHRD --NHNNENLA KIMYFGHDGH -------DEG PMGRKIVTLH V-EMEVSHRE --NHNNENLA KIMYFGHDGH -------DEG PMGRKIVTLH V-EMEVSHRE --NHNNEDLA KIMYFGPDGH -------DEG PMGRKIVNLH V-EMEVSHRE --NHNDENLA KIMYFGPDGH -------DEG PMGEKIVNLH V-EMEASHRE --NHNDENLA KIMYFGPDGH -------DEG PMGEKIVNLH V-EMEASHRE --NHNDENLA KIMYFGPDGH -------DEG PMGEKIVNLH V-EMEASHRE --NRSVENLA KIIYFCPDEH -------DER KMWGKIFALE W-EMDISHRH --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRH --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRH --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRH --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRH --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRH --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRM --NRNVENLA KIIYFCPDEH -------DER RMWGKIFALE W-EMDISHRV --NRNVENLA KIIYFCPDEH -------DER KMWGKIFCLE W-EMDISHRH --NRNIEDLA RIIYFGPDEH -------DEG KMMNKIFALE W-EMDMSHRV --NRNIEDLA RIIYFGPDEH -------DEG KMMNKIFALE W-EMDMSHRV --NRNAEDLA RIIYFGPDEH -------DEG KMMCKIFALE W-EMDMSHRM --NRNAEDLA RIIYFGPDEH -------DEG KMMCKIFALE W-EMDMSHRM --NRNAEDLA RIIYFGPDEH -------DEG KMMCKIFALE W-EMDMSHRM --NRNAEDLA RIIYFGPDEH -------DEG KMMCKIFALE W-EMDMSHRM --NRNAEDLA RIIYFGPDEH -------DEG KMMCKIFALE W-EMDMSHRM --NRNAEDLA RIIYFGPDEH -------DEG KMMCKIFALE W-EMDMSHRM --NRNVEDLA RVIYFGPDEH -------DEG KMMPKIFKLA W-EMDMSHRA --NRNVEDLA RVIYFGPDEH -------DEG KMMSKIFNLE W-EMDMSHRT --NRNVEDLA RIIYFGPDEH -------DEG KMMSKIFALE W-EMDMSHRV --NRNVEDLA RIIYFGPDEH -------DEG KMMFKIFALE W-EMDMSHRV --NRNVEDLA RIIYFGPDEH -------DEG KMMSKIFALE W-EMDMSHRV --NRNVEDLA RIIYFGPDEH -------DEG KMMSKIFALE W-EMDMSHRI --NRNVEDLA RTIYFGPDEH -------DEG KMMSKIYALE W-EMDMSHRV --NRNVEDLA RTIYFGPDEH -------DEG KMMSKIYALE W-EMDMSHRV --NRNVEDLA RIIYFGPDEH -------DEG KMMRKIFALE W-EMDMSHRD --NRNVEDLA RIIYFGPDEH -------DEG KMMRKIFALE W-EMDMSHRD --NRNVEDLA RIIYFGPDEH -------DEG KMMNKIFALE W-EMDMSHRD --NRNVEDLA RIIYFGPDEH -------DEG KMMRKIFALE W-EMDMSHRN --NRNVEDLA RIIYFGPDEH -------DEG KMMRKIFALE W-EMDMSHRN --NRNIEDLA RIIYFGPDEH -------DEG KMMGKIFALE W-EHDMSHRD --NHSCEHLA KSVYFEPDGE -------GEG KVMLKIFGLD WCEVERSHEH --NHSCEHLA KSVYFEPDGE -------GEG KVMLKIFGLD WCEVERSHEH --NHSCEHLA KGVYFEPDGE -------DEG KMMLKIFGLE WCEVERSHEH --NHSCEHLA KGVYFEPDGE -------DEG KMMLKIFGLE WCEVERSHEH --NHSCEHLA KGVYFEPDGE -------DEG KMMLKIFGLE WCEVERSHEH --NHSCEHLA KGVYFEPDGE -------DEG KMMLKIFGLE WCEVERSHER --NHSCEHLA KSVYFEPDGE -------DEG KMMLKIFGLD WCEVERSHEH --NHSCEHLA KSVYFEPDGE -------DEG KMMLKIFGLD WCEVERSHEH --NHSCEHLA KSVYFKPDGE -------DEG QMTMKIFNLD WCEVEKSHEK --NHSCEHLA KTVYFKPDGE -------DEG QMSQKIFNLD WCEVEKSHEK --NHSCEHLA KTVYFKPDGE -------DEG QMTMKIFNLD WCEVEKSHEK --NHSCEHLA KTVYFKPDGE -------DEG QMTMKIFNLD WCEVEKSHEK --NFSCEHLA KTVYFKPDGE -------DEG QMIMKIFNLD WCEVEKSHEK --NFSCEHLA KTVYFKPDGE -------DEG QMIMKIFNLD WCEVEKSHEK --NHSCQHLA KTVYFKPDGE -------DEG QMIVKIFNLD WCEVEKSHEK --NHSCQHLA KTVYFKPDGE -------DEG QMMVKIFNLD WCEVEKSHEK --NHSCQHLA KTVYFKPDGE -------DEG QMMVKIFNLD WCEVEKSHEK --NHSCEHLA KTVYFKPDGE -------DEG QMIMKIFNLD WCEVEKSHEK --NHSCEHLA KTVYFKPDGE -------DEG QMIVKIFHLD WCEVEKSHEK --NHSCEHLA KSVYFKPDGE -------DEG QMFMKIFNLD WCEVEKSHEK --NHSVEHLA KSVYFKPDGE -------DEG QMIMKIFNLD WCEVEKSHEK --NHSCEHLA KSVYFKPDGE -------DEG QMIMKIFNLD WCEVEKSHEK --NHSCEHLA KTVYFKPDGE -------DEG QMIMKIFGLD WCEVEKSHEE --NHSCERLA KHIYFQPDGE -------DEG KMVLKIFGLD WCEMEKSHQR --NHSCDRLA KHIYFQPDGE -------DEG KMILKIFGLD WCEMEKSHQR --NHSCERLA KHIYFQPDGE -------DEG KMILKIFGLD WCEMEKSHQR --NHSCERLA KHIYFQPDGE -------DEG KMILKIFGLD WCEMEKSHQR --NHSCEHLA KHIYFQPDGE -------DEG KMILKIFGLD WCEVERSHQR --NHSCEHLA KHIYFQPDGE -------DEG KMILKIFGLD WCEVERSHQR --NHSCEHLA KHIYFQPDGE -------DEG KMIIKIFGLD WCEVEKSHQR --NHSCERLA KHIYFQPDGE -------DEG KMILKIFELN WCEVEKSHQH --NHSCEMLA KTVYFEPDGE -------DEG KMVLKIFELD WVEMEKSHQQ --NHSCEMLA KTVYFEPDGE -------DEG KMVLKIFELD WVEMEKSHQQ --NHSCEMLA KTVYFEPDGE -------DEG KMVLKIFELD WVEMEKSHQQ --NHSCEMLA KTVYFEPDGE -------DEG QMVLKIFELD WVEMEKSHQQ --NHSCEMLA KTVYFEPDGE -------DEG KMVLKIFELD WVEMEKSHQQ --NHSCEMLA KTVYFEPDGE -------DEG KMVLKIFELD WVEMEKSHQQ --NHSCEMLA KTVYFEPDGE -------DEG KMVHKIFELD WVEMEKSHQQ --NHSCEMLA KTVYFEPDGE -------DEG KMVHKIFELD WVEMEKSHQQ --NHACEMLA KTVYFEPDGE -------DEG KMVIKIFGLD WCEIEKSHQQ --NHACEMLA KSVYFEPDGE -------DEG KMVLKIFGLD WCEIEKSHQQ --NHACEMLA KSVYFEPDGE -------DEG KMNLKIFGLD WCEIEKSHQQ --NHACEMLA KTVYFEPDGE -------DEG KMVIKIFGLD WCEMEKAHQQ --NHACEMLA KTVYFEPDGE -------DEG KMVIKIFGLD WCEMEKAHQQ --NHSNEHMA KTIYFEPDGH -------DEG KMILNIFGLD WCEVETSHQE RMNRLTEVAA KMAYFGPDGF HWDVELWEEN DLNCDDFELG W-NLKH---- RMNRLTEVAA KMAYFGPDGF HWDVELWEEN DLNCDDFELG W-NLKP---- RMNRLTEVAA KMAYFGPDGF HWDVELWEEN DLNCDDFELG W-NLKP---- MVNRSPEVAA KMAYFGPDGF HWDVELCEES DLTVDDFELG W-VLKP---- MVNRSPEVAA KMAYFGPDGF HWDIELCEEN DLTCDDFELG W-VLKP---- -SSVDDNVV- VVMLAFAVSF CHPWGHYIQG LGD-QHKLAR PNT---AQKL -SSVDDNVV- VVMLAFAVSF CHPWGHYIQG LGD-QHKLAR PNT---AQKL -SSVDDNVV- VVMLAFAVSF CHPWGHYIQG LGD-QHKLAR PNT---AQKL -SSVDDNVV- VVMLAFQASF CDPWGHYIQG LGD-QYKLAR PNT---ARQL -SSVRDNMI- VIMLPFNINS IDPWRHYILG LND-QIQLTR PNT---ARRL -SSVRDNMI- VIMLPFSINS IDPWRHYILG LND-QIQLTR PNT---ARRL -SSVRDNMI- VIMLPFSINS IDPWRHYILG LND-QIQLTR PNT---ARRL -SSVRDNMI- VIMLPFSINS IDPWRHYILG LND-QIQLTR PNT---ACRL -SSVRDNMI- VIMLPFSINS IDPWRHYILG LND-QIQLTR PNT---ACRL -SSVRDNMI- VIMLPFSINS IDPWRHYILG LND-QIQLTR PNT---ACRL -SSVRDNML- VIMLPFLVNN LDPWRHYILG LND-QIQLTR PNT---ARRL -SSVRDNYI- VIMLPFKINS RDPWRHYILG IND-QVCLNR PET---ARRL -SSVRDNII- VIMLPFKINS RDPWRHYILG LND-QVCLTR PET---ARRL -SSVKDNMIR VIMLPFSNSS VDPWRHVILG LND-QIKLTR PNT---AHRL -SSVKDNMIR VIMLPFPSSS VDPWRHVILG LND-QIKLTR PNT---AHRL -SSVKDNMIR VVMLPFDNNS WDPWRHVILG LND-QIKLTR PNT---AHRL -SSVKDNMIR VIMLPFSNNS WDPWRHVILG LND-KIKLTR PNT---AHRL -SSVKDNMIR VIMLPFSNNS WDPWRHVILG LND-QIKLTR PNT---AHRL -SSVKDNMIR VIMLPFGENS WDPWRHVILG LND-QIKLTR PNT---AHRL -SSVDDNHC- VEMLPFMCQR VDPWGHYVQI LAD-RQDLAR PVT---LQAL -SSVDDNQC- VELLPYVCQR MDPWGHYVQI LAD-RLDLTR PVT---LQGL -SSVDDNQC- VELLPYVCQR MDPWGHYVQI LAD-RLDLTR PVT---LQGL -SSVDDNQC- VELLPYVCQR MDPWGHYVQI LAD-RLDLTR PVT---LQGL -SSVDDNQC- VELLPYNCQR MDPWGHYVQI LAD-RLDLTR PVT---LQGL -SSVDDNQC- VELLPYVCQR MDPWGHYVQV LAD-RLDLTR PVT---LQGL -SSVDDNQC- VELLPYVCQR MDPWGHYVQI LAD-RLVLTR PVT---LHGL -SSVDDNQC- VELLPYLCQR MDPWGHYVQI LAD-RLNLTR PVT---LQGL -SSVDDNVC- VEMLPFVCQK MDPWGHYVQI LAD-RLDLTR PVT---LQGL -SSVDDNYI- IEMLPFVCDR VGPWGHYAQV LAD-QLHLTR PHT---LRDL -SSVDDNYI- IEMLPFVCDR VGPWGHYAQV LAD-QLHLTR PHT---LRDL -SSVDDNYI- IEMLPFVCDR VGPWGHYAQI LAD-QLNLTR PHT---LREL -SSVDDNYI- IEMLPFVCDR VGPWGHYAQI LAD-QLNLTR PHT---LREL -SSVDDNYI- IEMLPFVCDR VGPWGHYAQI LAD-QLNLTR PHT---LREL -SSVDDNYI- IEMLPFVCDR IGPWGHYAQI LAD-QLNLTR PHT---LREL -SSVDDNYI- IEMLPFVCDR IGPWGHYAQI LAD-QLNLTR PHT---LREL -SSVDDNYI- IEMLPFVCDR IGPWGHYAQI LAD-QLNLTR PHT---LREL -SSVDDNYV- IEMLPFVCHR VGPWGHYAQV QAD-QQNLTR PHT---LADL -SSVDDNYV- IDMLPFICHR VGPWGHYAQV LAD-QINLTR PHT---LRDL -SSVDDNFV- IEMLPFVCHR IGPWGHYAQM LAD-QIVLTR PHT---LRDL -SSVDDNFV- IEMLPFVCHR IGPWGHYAQM LAD-QIVLTR PHT---LRDL -SSVDDNFV- IEMLPFVCHR IGPWGHYAQM LAD-QIVLTR PHT---LRDL -SSVDYNYV- IEMLPFVYHR VGPWGHYAQM LSD-QIVLTR PHT---LRDL -SSVDYNYV- IEMLPFVYHR VGPWGHYAQM LAD-QIVLTR PHT---LRDL -SSVDYNYV- IEMLPFVYHR VGPWGHYAQM LAD-QIVLTR PHT---LRDL -SSVDDNYV- IEMLPFMCHR VGPWGHYAQQ LAD-RLVLTR PHT---LRDL -SSVDDNYV- IEMLPFMCHR VGPWGHYAQQ LAD-KLVLTR PHT---LRDL SSSVDDNYI- IEMLPFVCHR VGPWGHYAQQ LAD-RLVLTR PHT---LRDI -SSVDDNYV- IEMLPFMCHR VGPWGHYAQQ LAD-RLVLTR PHT---LRNI -SSVDDNYV- IEMLPFMCHR VGPWGHYAQQ LAD-RLVLTR PHT---LRNI -SSVDDNYV- IEMLPFMCHR VGPWGHYAQQ LAD-RLVLTR PHT---LRDI -SSVDDNYN- VNMLPFQNSR KDPVGHYVQD LED-ARRLIR PGT---ARSL -SSVDDNYN- VNMLPFQNSR KDPVGHYVQD LED-ARRLIR PGT---ARSL -SSIDDNYT- VNMLEFPNSR KDPVGHYVQN LED-AIRLIR PGT---ARSL -SSIDDNYT- VNMLEFPNSR KDPVGHYVQN LED-AIRLIR PGT---ARSL -SSIDDNYT- VNMLPFPNSR KDPVGHYVQD LED-AIRLIR PGT---ARSL -SSIDDNYT- VNMLPFPNSR KDPVGHYVQD LED-AIRLIR PGT---ARSL -SSVDDNYN- VNMLPFPNSR QDPVGHYVQD LED-LLRLIR PGT---ARSL -SSVDDNYN- VNMLPFPNSR KDPVGHYVQD LED-LLRLIR PGT---ARSL -SSVDDNYN- VNMLPFMQSH GDPVGHYVQE LED-ALRLIR PGT---ARAL -SSVDDNYN- VNMLPFMQSH GDPVGHYVQE LEDSALRLIR PGT---ARAL -SSVDDNYN- VNMLPFMQSH GDPVGHYVQE LED-ALRLIR PGT---ARAL -SSVDDNYN- VNMLPFMQSH GDPVGHYVQE LED-ALRLIR PGT---ARAL -SSVDDNYD- VNMLPFMQSH GDPVGHYVQG LED-ALRLIR PGT---ARAL -SSVDDNYD- VNMLPFMQSH GDPVGHYVQG LED-ALRLIR PGT---ARAL -SSVDDNYN- VNMLPFMQSH GDPVGHYVQG LED-ALRLIR PGT---ARAL -SSVDDNYN- VNMLPFMQSH GDPVGHYVQG LED-ALRLIR PGT---ARAL -SSVDDNYN- VNMLPFMQSH GDPVGHYVQG LED-ALRLIR PGT---ARAL -SSVDDNYH- VNMLPFAQSN RDPVGHYVQG LED-ALRLIR PGT---ARAL -SSVDDNYH- VNMLPFAQSN GDPVGHYVQG LED-ALRLIR PGT---ARAL -SSVDDNYN- VNMLPFVQSN GDPVGHYVQG LED-ALLLIR PGT---ARAL -SSVDDNYN- VNMLPFVQSN GDPVGHYVQG LED-ALLLIR PGT---ARAL -SSVDDNYD- VNMLPFVQSN GDPVGHYVQG LED-ALLLIR PGT---ARAL -SSVDDNYN- VNMLPFVSSN GDPVGHYVQG LED-ALRLIR PGT---ARAL -SGVDDNYQ- VNMLPFNHSK NNPVGHYVQG LED-ALNLIR PGT---ARAL -SGVDDNVQ- VNMLPFNHSK HNPVGHYVQG LED-ELNLIR PGT---ARAL -SGVDDNYQ- VNMLPFNHSK HNPVGHYVQG LED-ALNLIR PGT---ARAL -SGVDDNYQ- VNMLPFKHSK HNPVGHYVQG LED-ALNLIR PGT---ARAL -SGVDDNYK- VNMLPFSHSK HNPVGHYVQG LGD-ALRLIR PGT---ARAL -SGVDDNYK- VNMLPFSHSK HNPVGHYVQG LGD-ALRLIR PGT---ARAL -SGVDDNYK- VNMLPFSHSN HNPVGHYVQG LGD-ALRLIR PGT---ARAL -SGVDDNYQ- VNMLPFDHCK HNPVGHYVQG LED-ALGLIR PGT---ARAL -SSVDDNYL- VNMLPFLHSR QNPVGHYVQG LED-PMHLIR PGT---ARKL -SSVDDNYL- VNMLPFMQSR ENPVGHYVQG LED-PMHLIR PGT---ARKL -SSVDDNYL- VNMLPFIHSR DNPVGHYVQG LED-PMHLIR PGT---ARKL -SSVDDNYL- VNMLPFIHSR ENPVGHYVQG LED-PMHLIR PGT---ARKL -SSVDDNYL- VNMLPFIRSR ENPVGHYVQG LED-PMHLIR PGT---ARKL -SSVDDNYL- VNMLPFMHSR ENPVGHYVQG LED-PMHLIR PGT---ARKL -SSVDDNYL- VNMLPFFHSR ENPVGHYVQG LED-PMHLIR PGT---ARKL -SSVDDNYL- VNMLPFFHSR ENPVGHYVQG LED-PMHLIR PGT---ARKL -SSVDDNYL- VNMLPFTHSR ETPVGHYVQG LED-PMHLMR PGT---ARAL -SSVDDNYL- VNMLPFAHSR QTPVGHYVQG LED-PMHLMR PGT---ARAL -SSVDDNYL- VNMLPFTHSR ETPVGHYVQD LED-PMHLMR PGT---ARAL -SSVDDNYL- VNMLPFTHSR ENPVGHYVQG LED-PMHLIR PGT---ARAL -SSVDDNYL- VNMLPFTHSR ENPVGHYVQG LED-PMHLIR PGT---ARAL -SSSDDNHL- VNMLPFGVSR DDPVGHYMLG LED-AIRLYR PGT---ARAL ---EDDHPL- LCIGSFSVHK YVSVMVYPLP MND-CVRMSQ PCHAAHAQDI ---EDDHPL- LCIGSFSVHK YVSVMVYPLP MND-CVRMSQ PCHAAHAQDI ---EDDHPL- LCIGSFSVHK YVSVMVYPLP MND-CVRMSQ PCHAAHAQDI ---EDDHPL- ICIGSFSVHK QFSVMVYPLP MND-AIRMSQ PCHAAHAQDI ---EDDHPL- ICIGSFSVHK QFSVMVYPLP MND-AIRMSQ PCYAAHAQDI SILT---VFH FSGGFRDKPM -ERSQLHS-T YSED----KK DQKVLIHAMK SILT---VFH FSGGFRDKPM -ERSQLHS-T YSED----KK DQKVLIHAMK SILT---VFH FSGGFRDKPM -ERSQLHS-T YSED----KK DQKVLIHAMK SILV---VMH FSGGFRDKPI -ERSQLHC-T YSEN----KK DQKVVIRAMK SILH---VMH YSGEFHDKSA -ERSQLHS-R YSEH----KN DHKCVILASK SILH---VMH YSGAFHDKSA -ERSQLHS-R YSEH----KN DHKCVILASK SILH---VMH YSGAFHDKSA -ERSQLHS-R YSEH----KN DHKCVILASK SILH---VMH YSGEFHDKSA -ERSQLHS-R YSEH----KN DHKCVILASK SILH---VMH YSGEFHDKSA -ERSQLHS-R YSEH----KN DHKCVILASK SILH---VMH YSGEFHDKSA -ERSQLHS-R YSEH----KN DHKCVILASK SILM---VMH YSGEFHDKSP -ERSQLHFDS YSEH----KN DHKCVILASK SILA---VMH YSGEFHDKSP -ERSHLHF-R YSEM----KN DVKCVI---K SILV---VMH YSGEFHDKSP -ERSHLHY-R YSEV----KN DVKCVILASK SILV---VTH YSGGFHDKSA -ERSQHHF-I YNEV----KD DHKCVILASK SILV---VTH YSGGFHDKSA -ERSQHHF-I YNEV----KD DHKCVILASK SILV---VMH YSGGFHDKSS -ERSQHHF-I YNEV----KN DHKCVILASK SILV---VMH YSGGFHDKSA -ERSQHHF-I YNEI----KN DHKCVILASK SILV---VMH YSGGFHDKSA -ERSQHHF-I YNEI----KN DHKCVILASK SILV---VMH YSGGFHDNSA -ERSQHHF-I YNEV----KN DHKCVILASK SILP---CPH ASGKEQDGAV -ERSQHYV-V YAEL----QV DHKCTIAAHK SILP---CPH ASGKEQDGAV -ERSQHYG-V YQEL----QN DHKSTIDANK SILP---CPH ASGKEQDGAV -ERSQHYG-V YQEL----QN DHKSTIDANK SILP---CPH ASGKEQDGAV -ERSQHYG-V YQEL----QN DHKSTIDANK SILP---CPH ASGKEQDGAV -ERSQHYG-V YQEL----QN DHKSTIDANK SILP---CPH ASGKEQDGAV -ERSQHYG-V YQEL----QN DHKSTIDANK SILP---CPH ASGKEQDGAE -ERSQHYG-V YQEL----QN DAKSTIDANK SILP---CPH ASGKEQDGAV -ERSQHYG-V YQEL----QN DAKSTIDANK GILV---CPH ASGKEQDGAM -ERSQHYV-V YAEL----QN DHKNTINANK YILAC--CPV ASGKEQDGNK -ERSQHYT-I YAEI----QN DHKNPISANN YILAC--CPV ASGKEQDGNK -ERSQHYT-I YAEI----QN DHKNPISANN YILA---CPV ASGKEQDGAQ -ERSQCYT-I YAEL----QN DHKSLISANH YILA---CPV ASGKEQDGAK -ERSQCYT-I YAEL----QN DHKSLISANH YILA---CPV ASGKEQDGAQ -ERSQCYT-I YAEL----QN DHKSLISANH YILA---CPV ASGKEQDGAQ -ERSQCYT-I YAEL----QN DHKSLISANH YILA---CPV ASGKEQDGAK -ERSQCYT-I YAEL----QN DHKSLIAANH YILA---CPV ASGKEQDGAQ -ERSQCYT-I YAEL----QN DHKSLISANH FILS---CPV ASGKEQDGAT -ERSQYYV-V YAEL----QN DHKSPISANK FILA---CPV ASGKEQDGAT -ERSQHYV-V YAEL----QN DHKSPISANK FILS---CPV ASGGEQDGNT -ERSQHYI-V YAEL----QN DHKSPISANK FILS---CPV ASGGEQDGNT -ERSQHYI-V YAEL----QN DHKSPISANK FILS---CPV ASGGEQDGNT -ERSQHYI-V YAEL----QN DHKSPISANK FILA---CRV ASGGEVDGNT -ERSQHYI-V YAEL----QN DHKSPISANK FILA---CPV ASGGEQDGNA -ERSQHYI-V YAEL----VN DHKSPISANK FILA---CPV ASGGEQDGNA -ERSQHYI-V YAEL----VN DHKSPISANK FILA---CPV ASGGEQDGNT -ERSQHYI-V YAEL----QN DHKSPISANK FILA---CYV ASGGEQDGNT -ERSQHYI-V YAEL----QN DHKSPILANK FILA---CPV ASGGEQDGNT -ERSQHYI-V YAEL----QD DHKSPISANK FILA---CPV ASGGEQDENT -ERSQHYI-V YAEL----QN DHKSPIHANK FILA---CPV ASGGEQDENT -ERSQHYI-V YAEL----QN DHKSPINANK FILA---CPV ASGGEQDAVT -ERSQHYI-V YAEL----QN DHKSPISASK TILF-YGCQY YSGEFQDCEI -ERSQLYN-V YCEH----KQ DHKSAIIANK TILF-YGCQY YSGEFQDCEI -ERSQLYN-V YCEH----KQ DHKSAIIANK TILL-YGCQY YSGEFQDCEV -ERSQCYN-V YCEL----KQ DHKSAIIANK TILL-YGCQY YSGEFQDCEV -ERSQCYN-V YCEL----KQ DHKSAIIANK TILL-YGCQY YSGEFQDCEV -ERSQCYN-V YCEL----KQ DHKSAIIANK TILL-YGCQY YSGEFQDCEV -ERSQCYN-I YCEL----KQ DHKSAIIANK TVLF-YGCQY YSGQFQDCEI -ERSQLYN-V YCEL----KQ DHKSAIMANK TVLF-YGCQY YSGEFQDCEI -ERSQLYN-V YCEL----KQ DHKSAIMANK VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DYKSAIIANQ VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DYKSAIIANQ VILF-YGCQY YSGKFQDSEM FERSQLYR-V YCEI----KK DYKSAIIANQ VILF-YGCQY YSGKFQDSDM FERSQLYR-V YCEI----KK DYKSAIIANQ IILF-YGCQY YSGKFQDSEL FERSQLYR-M YCEI----KK DHKSAIIANQ IILF-YGCQY YSGKFQDSEL FERSQLYR-M YCEI----KK DHKSAIIANQ VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHNSAIIANQ VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHNSAIIANQ VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHNSAIIANQ VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHKSAIIANQ VILS-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHKSAIVANQ VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHKSAIIANQ VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHKSAIMANQ VILF-YGCQY YSGKFQDSEL FERSQLYR-V YCEI----KK DHKSTIIANQ VILF-YGCQF YSGKFQDSEL FERSQLYR-V YCEI----KK DHKSAIIANQ NILF-YGCEV YSGEFQDSES -ERSWIYN-V YCEI----KK DHKSAIIAHK NILF-YGCEY YSGEFQDSES -ERSWIYN-V YCEI----KK DHKSAIMACK NILF-YGCEY YSGEFQDSES -ERSWIYN-V YCEI----KK DHKSAIMAYK NILF-YGCEY YSGEFQDSES -ERSWIYN-V YCEI----KK DHKSAIMAYK NILF-YGCQY CSGEFQDSEE -ERSWIYN-V YCEI----KK DHKSAILAHK NILF-YGCQY CSGEFQDSEE -ERSWIYN-V YCEI----KK DHKSAILAHK NILF-YGCEY YSGEFQDSEE -ERSWIYN-V YCEI----KK DHKSAIMAHK NILF-YGCQY YSGEFQDSEA -ERSWIYN-V YCEI----KK DHKSGIMAHK NILF-YGCEY YSGQFPDGEA -ERSWIYD-V YCEM----KK DHKSAVIAHK NILF-YGCEY YSGQFPDGEA -ERSWIYD-V YCEM----KK DHKSAVVAHK NILF-YGCEY YSGQFPDGEA -ERSWIYP-V YCEM----KK DHKSAVVAHK NILF-YGCEY YSGQFPDGEA -ERSWIYD-V YCEM----KK DHKSAVVAHK NILF-YGCEY YSGQFPDAEA -ERSWIYD-V YCEM----KK DHKSAVYAHK NILF-YGCEY YSGQFPDCEA -ERSWIYD-V YCEM----KK DHKSAVVAHK NILF-YGCQY YSGQFPDGEA -ERSWIYD-V YCEM----KK DHKSAVVAHK NILF-YGCQY YSGQFPDGEA -ERSWIYD-V YCEM----KK DHKSAVVAHK NILF-YGCEY YSGQFPDGEP -ERSWIYD-V YCEL----KK DHKNAIVAVK NILF-YGCEY YSGHFPDGEA -ERSWIYD-M YCEL----KK DHKSAIVAVK NILF-YGCEY YSGHFPDGEP -ERSWIYD-M YCEL----KK DHKSAIVAVK NILF-YGCEY YSGQFPDGEP -ERSWIYD-V YCEL----KK DHKSAIVAVK NILF-YGCEY YSGQFPDGEP -ERSWIYD-V YCEL----KK DHKSAIVAVK NILF---VNY YSGDFQDPEL -ERSQLYN-V YCEQ----KQ DHRNAIRASK PTEQ---TRY QIHTFLDDSI -RRDLCNQ-G CHEENMVWRD DLKDPISTEV PTEQ---TRY QIHTFLDDSI -RRDLCNQ-G CHEENMVWRD DLKDPISTEV PTEQ---TRY QIHTFLDDSI -RRDLCNQ-G CHEENMVWRD DLKDPISTEV HTDQ---DRY DVRTFLGHSI -KCHMCNQ-A CHEENIVWRD DLKDPITTEV HTDQ---DRY DIRTFLEHSI -RCHLCNQ-A CHEENIVWRD DLKDPITTEV -RNKGLDWHA GND-MKGGPK -IIYLGMLFY -------AEN NVAKQKAHFV -RNKGLDWHA GND-MKGGPK -IIYLGMLFY -------AEN NVAKQKAHFV -RNKGLDWHA GND-MKGGPK -IIYLGMLFY -------AEN NVAKQKAHFV -RNKGCDWHA GKD-MAGGPK -IIYVGMLFY -------AES NWAKQKAHFV -HNKDHDWAT GKDEMKGGAK -IMNPGALFY -------AEQ NWVRPKNHFV -VNKDHDWIT GKDEMKGGAK -IMNPGALFY -------AEQ NWVRPKNHFV -VNKDHDWIT GKDEMKGGAK -IMNPGALFY -------AEQ NWVRPKNHFV -HNKDHDWAT GKDEMKGGAK -IMNPGALFY -------AEQ NWVRPKNHFV -HNKDHDWAT GKDEMKGGAK -IMNPGALFY -------AEQ NWVRPKNHFV -HNKDHDWAT GKDEMKGGAK -IMNPGALFY -------AEQ NWVRPKNHFV -HNKGLDWAT GKDDMKGGAK -IMMSGALFY -------AEQ NWIRARNHFV -SNKGLDWTA GKDGMKGEAK -AMMNGALFY -------AEQ NWFRKKNHFV -SNKGLDWTA GKDGMKGENK -AMMNGTLFY -------AEQ NWFRQKNHFV -CNKGLDWAA GKD-MKGGAK -VMMNGALFY -------AER NWIRQKHHFV -CNKGLDWAA GKD-MKGGAK -IMMNGALFY -------AER NWIRQKHHFV -SNKGLDWAA GKD-MKGGAK -MMANGALFY -------EER NWIRQKNHFV -ANKPLDWAA GKD-MKGGAK -MMANGALFY -------AER NWIRQKNHFV -ANKGLDWAA GKD-MKGGAK -MMVNGALFY -------AER NWIRQKNHFV -ANKGLDWAA GKD-MKGGAK -MMVNGALFY -------AER NWIRQKNHFV YQDKMKDWDT GKN-MDEGAK -MYVEIHLFY -------AEN NIRRQNDHSV NDNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NLRKQNEHSV NDNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NLRKQNEHSV NDNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NLIKQNEHSV NDNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NLRKQNEHSV NDNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NIRKQNEHSV NPNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NIRKLNEHSV NPNKMLDWDT GKN-MDKGAK -MFYKIVLFY -------AEN NIRKQNEHSV VPRKMLDWDT GKN-MDKGAK GMYHEIVLFF -------AEN NVKKQVEHSV QPNKMLDWDT GKT-MDRGAA -MCGEIKLFY -------AEP NVWNQNDHSV QPNKMLDWDT GKT-MDRGAA -MCGEIKLFY -------AEP NVWNQNDHSV VPNKMLDWDT GKT-MDRGAA -MCAEIKLFY -------AEP NVWKQNDHSV VPNKMLDWDT GKT-MDRGAA -MCAEIKLFY -------AEP NVWKQNDHSV VPNKMLDWDT GKT-MDRGAA -MCAEIKLFY -------AEP NVWKQNDHSV VPNKMLDWDT GKT-MDRGAA -MCAEIKLFY -------AEP NVWKQNDHSV VPNKMLDWDT GKT-MDRGAA -MCAEIKLFY -------AEP NVWKQNDHSV VPNKMLDWDT GKT-MDRGAA -MCAEIKLFY -------AEP NVWKQNDHSV HPDKMLDWDT GKS-MDRGAA -MCREIKLFY -------AEP NVWKQNDHSV HPNKLLDWDT GKT-MDRGAA -MCHEIKLFH -------AEP NVWRQNDHSV HLNKMLDWDT GKT-MDREAT -MCREMKLFY -------AET NLWKQNDHSV HLNKMLDWDT GKT-MDREAT -MCREMKLFY -------AET NLWKQNDHSV HLNKMLDWDT GKT-MDREAT -MCREMKLFY -------AET NLWKQNDHSV VLNKMLDWDT GKT-MDREAA -MCREIKLFY -------AET NVWKQNDHSV VLNKMLDWDT GKT-MDREAA -MCRDIKLFY -------AET NVWKQNDHSV VLNKMLDWDT GKT-MDREAA -MCRDIKLFY -------AET NVWKQNDHSV HMNKMLDWDT GKT-MDFEAA -MCREIKLFY -------AET NVWKINDHSV VVNKMLDWDM GKT-MDFEAA -MCREIKLFY -------AET NVWKQNNHSV HMRKMLDWDT GKT-MDREAA -VCREIKLFY -------AET NCWKQNDHSV VMNKMLDWDT GKT-MDREAA -MCREIKLFV -------AET NVWKQNDHSV VMNKMLDWDT GKT-MDREAA -MCREIKLFV -------AET NVWKQNDHSV HMNKMLDWDT GKT-MDREAA -MCREIKLFY -------AET NVWKQNDHSV QEQKGMDWNT GKE-MEQGPK -IILHGSLFF -------AEP NIVRQPGVSH QEQKGMDWNT GKE-MEQGPK -VILHGTLFF -------AES NIVRQPGVSH QEQKGMDWNT GKE-MEQGPK -IILHGCLFF -------AEP NIVRQPGVSH QEQKGMDWNT GKE-MEQGPK -IILHGCLFF -------AEP NIVRQPGVSH QEQKGMDWNT GKE-MEQGPK -IILHGCLFF -------AEP NIVRQPGVSH QEQKGMDWNT GKE-MEQGPK -IILHGCLFF -------AEP NIVRQPGVSH QEQKGMDWNT GKE-MEQGPK -IILHGVLFF -------AEP NVVRQPGVSH QEQKGMDWNT GKE-MEQGPK -IILHGVLFF -------AEP NVVRQPGVSH QEHKGMEWDT GKE-MQQGPK -VVVHLALFY APSNVLAAEP NIIGQPQVSH QDHKGMEWDT GKE-MQQGPK -VVVHLALFY MPSNVLAAEP NIIGQPQVSH QEHKGMEWDT GKE-MQQGPK -IVVHLGLFY MPSNVLAAEP NIIGQPQVSH QEHKGMEWDT GKE-MQQGPQ -IEVHLALFY MPSNVLPAEP NIIGQPQVSH QEHKGMEWDT GKE-MQQGPK -VVLHHALFY APSNVLAAEP NIIGQPQVSH QEHKGMEWDT GKE-MQQGPK -VVLHHALFY APSNVLAAEP NIIGQPQVSH QEHKGMDWDT GKE-MQQGPK -IVVVLALFY QPSNVLAAEP NIMGQPQVSV QEHKGMDWDT GKE-MQQGPK -IVVVLALFY QPSNVLAAEP NIMGQPQVSV QEHKGMDWDT GKE-MQQGPK -IVVVLALFY QPSNVLAAEP NIMGQPQVSV HEVKGMDWNT GKE-MQQGPK -IVVHLALFY APSNVLAAEP NIVGQPQVSH QEVKGMDWDT GKE-MQQGPK -IVVHLALFY APSNVLAAEP NIVGQPQVSH QGVKGMDWDT GKE-MQQGPK -IVVHLALFY APSNVLAAEP NIIGQPQVSH QGVKGMDWDT GKE-MQQGPK -IVMHLALFY APSNVLAAEP NIIGQPQVSH QGVKGMDWDT GKE-MQQGPK -IVVHLTLFY APSNVLAAEP NIFGQPQVSH QGVKGMDWDT GKE-MQQGPK -IVVHLALFY APSNVLAAEP NIIGQPQVSH YEHKGMDWDT GKE-AQQGPE -VVHHNLLFF -------AEP NIHGQPGVGH YRHKGMDWDT GKE-AEQGPE -VVHHNLLFF -------AEP NIQGQPGVGH FEHKGMDWDT GKE-AEQGPE -VVHHNLLFF -------AEP NIQGQPGVGH YEHKGMDWDT GKE-AEQGPE -VVHHNLLFF -------AEP NIHGQPGVGH YKHKGMDWDT GKE-MEQGPK -VVNYNLLFY -------AEP NIHGQPRVGH YKHKGMDWDT GKE-MEQGPK -VVNYNLLFY -------AEP NIHGQPRVGH YEHKGMDWDT GKE-MEQGPK -VVHHNILFY -------AEP NIHGQPRVGH YDHKGVDWDT GKE-MEQGPK -VLHHNLLFY -------AEP NIHGQPTVGH HEHKGMDWDT GKE-MNQGPK -IVLHGVLFN -------AEP NIHGQPGVSH VEHKGMDWDT GKE-MNQGPK -IVLHGVLFN -------AEP NIHGQPGVSH AEHKGMDWDT GKE-MNQGPK -IVLHGVLFN -------AEP NIHGQPGVSH VEHKGMDWDT GKE-MNQGPK -IVLHGVLFN -------AEP NIHGQPGVSH VEHKGMDWDT GKE-MNQGPK -IVLHGVLFN -------AEP NIHGQPGVSH HEHKGMDWDT GKE-MNQGPK -VVMHGVLFN -------AEP NIHGQPGVSH HQHKGMDWDT GKE-MNQGPK -VVLHGVLFN -------AEP NIHGQPGVSH HQHKGMDWDT GKE-MNQGPK -VVLHGVLFN -------AEP NIHGQPGVSH HEHKGMDWDT GKE-MNHGPK -VIVHGVLFH -------AEP NIGGQPGVSH HCHKGMDWDT GKE-MNHGPK -VIIHGLLFH -------AEP NIGGQPGVSH HCHKGMDWDT GKE-MNHGPK -VIIHGVLFH -------AEP NIGGQPGVSH HEHKGMDWDT GKE-MNQGPK -VIIHGVLFH -------AEV NIHGQPGVSH HEHKGMDWDT GKE-MNQGPK -VIIHGILFH -------AEV NIHGQPGVSH NDHKGMAWDT AKD-MEQGPK -MVEHQVLFY -------AEP NMHGQPEVIH IADKLHEWPT NVN-KENSAD ----HRQLFH -------ANS SALDKHQHNV IADKLHEWPT NQN-KENSAD ----HRQLFH -------ANS SALDKHQHNV IADKLHEWPT NQN-KENSAD ----HRQLFH -------ANS SALDKHQHNV IAEKLQEWPT NQN-KENVAD ----HRQLFH -------ANS SALDKHRHNV IADKLQEWPT NQN-AENVAD ----HRQLFH -------ANS SCLDRHRHNV VFL-ISDATR LLIVHEGCMI DYTFMEFACH PFFAELFMEH MVARYQYYSN VFL-ISDATR LLIVHEGCMI DYTFMEFACH PFFAELFMEH MVARYQYYSN VFL-LSDATR LLIVHEGCMI DYTFMEFACH PFFAELFMEH MVARYQYYSN VFL-ISDATR LLIHHVGCMI EYTFMEFACH PFFAELFMEH MVIRYQYYNN IFL-ICDAVR ILILHHGCMT DHTFMEFACN PFFSELFMEH VYIRYQYINN IFL-ICDAVR ILILHHGCMT DHTFMEFACN PFFSELFMEH VYIRYQYVNN IFL-ICDAVR ILILHHGCMT DHTFMEFACN PFFSELFMEH VYIRYQYVNN IFL-ICDAVR ILILHHGCMT DHTFMEFACN PFFAELFMEH VYIRYQYVNN IFL-ICDAVR ILILHHGCMT DRTFMEFACN PFFAELFMEH VYIRYQYVNN IFL-ICDAVR ILILHHGCMT DRTFMEFACN PFFAELFMEH VYIRYQYVNN IFL-ICDAVR ILIVLHGCMT AWTFMEFACN PFFAELFMEH VYIRYQYVNN IFV-FCDAVR ILIMHSGVMI GYTFMEFACN PFFAELFMEH VMVRYQYESN IFV-ICDAVR ILIVHSGVMT GFTFMEFACN PFFAELFMEH VYIRYQYQAN IFV-VCDAVR ILIVHYGCMI GYTFMDFACN PFFAELFMDH VFIRYVYMNN IFV-VCDAVR ILIVHYRCMI GYTFMDFACN PFFAELFMDH VFIRYQYMNN IFV-VCDAVR ILIVHYGCMV PYTFMAFACN PFFAELFMEH VFIRYQYVNN IFI-ICDAVR ILIVHYGCMV GDTFMDFACN PFFAELFMEH VFIRYQYIPN IFV-ICDAVR ILIVHYGCMV GDTFMDFACN PFFAELFMEH VFIRYQYIPN IFV-ICDAVR ILIVHYGCMV GDTFMDFACN PFFAELFMEH VVIRYQYIPN IFW-ISDAKK RLICGMSSMC LVTFMTLACN PLFAKLFMEV VPMRYDYLTN TFV-ISDAQK RLICGVSSMV MRTFMTLACN PFFAKLFMEV VPMSYDYVPN TFV-ISDAQK RLICGVSSMA MRTFMTLACN PFFAKLFMEV VPMSYDYVPN TFV-ISDAQK RLICGVSSMV MRTFMTLACN PFFAKLFMEV VPMSYDYVPN TFV-ISDAQK RLICGVSSMC MRTFMTLACN PFFAKLFMEV VPMSYDYVPN TFV-ISDAQK RLICGVSSMC MRTFMTLACN PFFAKLFMEV VPMSYDYVPN TFV-ISDAQK RLICGVSSMC MRTFMTLACN PFFAKLFMEV VPMSYDYVPN TFV-ISDAQK RLICGVSSMC MRTFMTLACN PFFAKLFMEV VPMSYDYVPN IFI-IADAQK RLICGVSSMC LQTFMNLACN PFFAKLFMEV IPMRYDYQTN MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFSKLFMEV VCMRYNYTSN MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFSKLFMEV VCMRYNYTSN MFI-ISDAQN KLIVANSIMV GQTFMAMACN PHFAKLFMEV VVMRYDYCSN MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFAKLFMEV VVMRYDYCSN MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFAKLFMEV VVMRYDYCSN MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFAKLFMEV VVMRYDYCSN MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFAKLFMEV VVMRYDYCSN MFI-ISDAQN RLIVANSIMV GQTFMAMACN PHFAKLFMEV VVMRYDYCSN VFI-ISDAVN RLICANSIMC GLTFMAMACN PHFAKLFMEV VCMRYDYFSN MFI-ISDASN RLICANSIMC GLTFMAMACN PHFAKLFMEV VCMRYEYFSN MFI-ISDAQN RLICGNSIMC GLTFMAMACN PHFAKLFMEV VCVHYDYGAN MFI-ISDAQN RLICGNSIMC GLTFMAMACN PHFAKLFMEV VCVHYDYGAN MFI-ISDAQN RLICGNSIMC GLTFMAMACN PHFAKLFMEV VCVHYDYGAN MFI-ISDAQN RLICGNSIMC GITFMAMACN PHFAKLFMEV ACMRYDYFAN MFI-ISDAQN RLICGNSIMC GLTFMAMACN PHFAKLFMEV ACMRYDYFAN MFI-ISDAQN RLICGNSIMC GLTFMAMACN PHFAKLFMEV ACMRYDYFAN MFI-VSDAQN RLICGNSIMC GLTFMAMACN PHFAKLFMEG VCMRYDYSAN MFI-VSDAHN RLICGNSIMC ALTFMAMACN PHFAKLFMEA VCMRYDYSAN MFI-VSDAQN RLICGSSIMC RLTFMAMAVN PHFAKLFMED VCMRYDYSPN MFIVISDAQN RLICGSSIMC RLTFMAMAVN PHFAKLFMEE SCVRYDYSAN MFIVISDAQN RLICGSSIMC GLTFMAMAVN PHFAKLFMEE SCVRYDYSRN MFI-ISDAQN RLICGSSIMC GLTFMATACN PHFAKLFMEE VCMRYDYAAN IFI-GNDARR VLICGKSMMP GHRFMREACV PFFHKLFMAV NQMRYDYMMN IFI-GNDARR VLICGLSMMP GHRFMREACV PFFHKLFMAV NQMRYDYMVN IFT-ENDARR MLICGNSMMP RHRFMREACV PFFHKLFMAV NQMRYDYVTN IFT-ENDARR MLICGNSMMP RHRFMREACV PFFHKLFMAV NQMRYDYVTN IFT-ENDARR MLICGNSMMP RHRFMREACV PFFHKLFMAA NQMRYDYVTN IFI-ENDARR MLICGNSMMP RHRFMREACV PFFHKLFMAV NQMRYDYMTN IFV-GNDARR MLICGLSMMP GHRFMKEACV PFFHKLFMAV RRMRYDYMSN IFV-GNDARR MLICGLSMMP GHRFMKEACV PFFHKLFMAV RRMRYDYMTN VFV-GNDARR MLIVGVSMMP AYKFMREACV PFFRKLFMAE NQVRYDYMWN WFV-GNDARR MLIVGVSMMP AYKFMREACV PFFRKLFMAD NQVRYDYMWN VFV-ANDARR MLIVGVSMMP AYKFMREACV PFFRKLFMAE NQVRYDYVWN VFV-GNDARR MLIVGVSLMP AYKFMREACV PFFRKLFMAE NQVRYDYMWN VFV-GNDARR MLITGVSMMP AYKFMREACV PFFRKLFMAE NQVRYDYVWN VFV-GDDARR MLITGVSMMP AYKFMREACV PFFRKLFMAE NQVRYDYVWN VFV-GNDGRR MLIVGCSMMP DYKFMREACV PFFRKLFMAE VQDRYDYAWN VFV-GNDARR MLIVGCSMMP DYKFMREACV PFFRKLFMAE VQDRYDYNWN VFV-GNDARR MLIVGCSMMP DYKFMREACV PFFRKLFMAE VQDRYDYNWN VFV-GNDARR MLIVGVSVMP SYKFMREACV PFFHKLFMAD NQVRHEYMWN VFV-GNDARR MLIVGVSVMP AYKFMREACV PFFHKLFMAE NQVRHDYMWN VFV-DNDARR MLIVGQSVMP PYKFMREACV PFFHKLFMAE NQVRHDYVWN VFV-DNDARR MLIVGQSVMP AYKFMREACV PFFHKLFMAE NQVRHDYVWN VFV-DNDARR MLIVGQSVMP AYKFMREACV PFFHKLFMAE NQVRHDYVWN VFV-DNDIRR MLIVGVSVMP AYKFMREACV PFFHKLFMAE NQVRHDYVWN IFV-GNDARR KLIAGVSFMS MVKFMCEACV PFFRKLFMAV GQMRYDYVNN IFV-GNDARR KLIAGLSFMA MMKFMCEACV PFFRKLFMAV GHMRYDYVSN IFV-GNDARR KLIAGVSFMA VMKFMCEACV PFFRKLFMAV GQMRYDYMNN IFV-GNDARR HLIAGVSFMA VVKFMCEACV PFFRKLFMAV GQMRYDYVNN IFV-GNDAHR KLIAGVSFMS IMKFMCEACV PFFRKLFMAV RQMRYMYMNN IFV-GNDAHR KLIAGVSFMS IMKFMCEACV PFFRKLFMAV RQMRYMYMNN IFV-GNDAVR NLIAGVSFMS IMKFMCEACV PFFRKLFMAV RQMRYMYMNN IFV-GNDARR KLIAGVSFMS VVKFMCEACV PFFRKLFMAF RQMRYDYVNN VFM-GNDATR KLITGVSVMP TYKFMCGACV PFFHKLFMAV RNMRYDYNVN VFM-GNDATR KLITGVSVMP TYKFMCGACV QFFHKLFMAV RNMRYDYTVN VFM-GNDATR KLITGVSVMP TYKFMCGACV PFFHKLFMAV RNMRYDYTVN VFM-GNDATR KLITGFSIMP TYKFMCGACV PFFHKLFMAV RNMRYDYTVN VFM-GNDATR KLISGVSVMP TYKFMCGACV PFFHKLFMAV RNMRYDYTVN VFM-GNDATR KLITGVSVMP TYKFMCNACV PFFHKLFMAV RNMRYDYTVN VFM-GNDATR KLIPGVSVMH DYKFMCGACV PFFHKLFMAV RNMRYDYTVN VFM-GNDATR KLIPGVSVMH DYKFMCGACV PFFHKLFMAV RNMRYDYTVN VFV-GNDATR KLIAGVSVMP IVKFMCGACV PFFRKLFMAL RNMRYDYASN VFV-GNDATR KLIAGASVMF IVKFMCGACV PFFRKLFMAL RNMRYDYSQN VFV-GNDATR KLIAGVSVMP IVKFMCGACV PFFRKLFMAL RNMRYDYSQN VFV-GNDATR KLIAGVSVMP FVKFMCGACV PFFRKLFMAL RNMRYDYTSN VFV-GNDATR KLIAGVSVMP FVKFMCGACV PFFRKLFMAL RNMRYDYTSN IFI-GNDAMC MLIPGMSVMV HHKFMRAACM PFFNKLFMAV RYMRYDYVDN VMN-GSPCIR ALIIGGSSVG VNHFMMGPCQ EFFTDLFMMY EGLQYACVCA VMN-GSPCIR ALIIGGSSVG VNHFMMGPCQ EFFTDLFMMY EGLQYACVCA VMN-GSPCIR ALIIGGSSVG VNHFMMGPCQ EFFTDLFMMY EGLQYACVCA MMN-GSACIR TLIIGGSIVG VNVFMMEPCQ EFFTDLFMVY EGLQYAVVCA MMN-GSSCIR TLIIGGSIVG VNVFMMGPCQ EFFTDLFMVY EGLQYAVVCA VDIKIFDTCF RSAVYAVDNS WETLCDY-EM LSGYFGAEIN HNPRLPEQVC VDIKIFDTCF RSAVYAVDNS WETLCDY-EM LSGYFGAEIN HNPRLPEQVC VDIKIFDTCF RSAVYAVDNS WETLCDY-EM LSGYFGAEIN HNPRLPEQVC VDIKIFDTCF RGAVYAEDNS WETLCGY-EM LSGYFGAEIN HNPRLPEQVC HDIKIKDTVF RLAVYAWDNF WETLCKC-EM LSGHVGAKIN HNPRLFEQVC HDIKIKDTVF RLAVYAWENF WETLCNC-EM LSGHVGAKIN HNPRLFEQVC HDIKIKDTVF RLAVYAWENF WETLCNC-EM LSGHVGAKIN HNPRLFEQVC HDIKIKDTVF RLAVYAWENF WETLCNC-EM LSGHVGAKIN HNPRLFEQVC HDIKIKDTVF RLAVYAWENF WETLCNC-EM LSGHVGAKIN HNPRLFEQVC HDIKIKDTVF RLAVYAWENF WETLCNC-EM LSGHVGAKIN HNPRLFEQVC HDIKIKDTEF RLAVYAWENS WETLCNC-EM LSGHIGAKIN HNPRLFEQVC HDIKIKDTCF RLAVYAWENS WETLCNV-EM LSGHFGAKIN HVPRLPEQVC HDIKIKDTCF RLAVYAWENS WETLCNV-EM LSGHFGAKIN HVPRLPEQVC HDIKIMDTCF RVAVYTWENS WETCFNR-EM LSGTFGAKIS HNPRLPEQVI HDIKIMDTCF RVAVYTWENS WETCCNR-EM LSGTFGAKIS HNPRLPEQVI HDIKIMDTCF RVAVYTWENS WETCCNR-EM LSGTFGAKIS HNPRLPEQVI HDIKIMDTCF RVAVYTWENS WETCCNR-EM LSGTFGARIS HVPRLPEQVI HDIKIMDTCF RVAVYTWENS WETCCNR-EM LSGTFGARIS HVPRLPEQVI HDIKIMDTCF RVAVYTWENS WETCCNR-EM LSGTFGARIS HVPRLPEQVI HDIKI----- -MEVYAPENS WETIKNV-EM VSGKEGAEIN HTPQLPEQVI HDIKI----- -IEVYAPENS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI HDIKI----- -IEVYAPENS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI HDIKI----- -IEVYAPENS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI HDIKI----- -IEVYAPENS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI HDIKI----- -IEVYAPDNS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI HDIKI----- -IEVYAPENS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI HDIKI----- -IEVYAPENS WETTKVV-EM VSGKSGAQIN HTPQLPEQVI HDIKI----- -KEVYAPENS WETIKNV-EM VSGKAGKQIN HEPQLPEQVI HSIKI----- -FEVYADENS WENVRNF-EM VSGRAGAHIN HIPQLPEQAY HSIKI----- -FEVYADENS WENVRNF-EM VSGRAGAHIN HIPQLPEQAY HSIKI----- -FEVYADENS WENIRNF-EM VSGRAGAYIN HIPQLPEQAV HSIKI----- -FEVYADENS WENIRNF-EM VSGRAGAHIN HIPQLPEQAC HSIKI----- -FEVYADENS WENIRNF-EM VSGRAGAHIN HIPQLPEQAC HSIKI----- -LEVYADENS WENIRNF-EM VSGRAGAHIN HIPQLPEQAC HSIKI----- -LEVYADENS WENIRNF-EM VSGRAGAHIN HIPQLPEQAC HSIKI----- -LEVYADENS WENIRNF-EM VSGRAGAHIN HIPQLPEQAC HSIKI----- -FEVYAEDDS WENICNF-EM MSGRTGAQIN HIPQLPEQVC HSIKI----- -FEVYAEENS WENVCNF-EM MSGRAGAQIN HIPQLPEQVC HDIKI----- -FEVYAAENS WENARNF-EM MSGRAGAEIN H-PQLPEQVC HDIKI----- -FEVYAAENS WENARNF-EM MSGRAGAEIN H-PQLPEQVC HDIKI----- -FEVYAVENS WENARNF-EM MSGRAGAEIN H-PQLPEQVC PDIKI----- -FEVYAAENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC PDIKI----- -FEVYADENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC PDIKI----- -FEVYADENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC HDIKI----- -FEVYADENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC HDIKI----- -VEVYADENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC HDIKI----- -FEVYADENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC HDIKI----- -FEVYANENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC HDIKI----- -FEVYANENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVC HDIKI----- -SEVYAEENS WENVRNF-EM MSGRAGAQIN H-PQLPEQVV YDIKIYETHW RMGVYALDNS WETLNVS-EM TSGRIGAKIN HLPRLPEQVI YDIKIYETHW RMGVYALDNS WETLNVS-EM TSGRIGAKIN HLPRLPEQVI YDIKIYETHW RVGVYAIDNS WETLNVS-EM TSGRMGAKIN HLPRLPEQVI YDIKIYETHW RVGVYAIDNS WETLNVS-EM TSGRMGAKIN HLPRLPEQVI YDIKIYETHW RVGVYAIDNS WETLNVS-EM TSGRMGAKIN HLPRLPEHVI YDIKIYETHW RVGVYAIDNS WETLNVS-EM TSGRMGAKIN HLPRLPEQVI YDIKIYETHW RPGVYALDNS WETLNVS-EM TSGRIGAKIN HLPRLPETVI YDIKIYETHW RPGVYALDNS WETLNVS-EM TSGRIGAKIN HLPRLPETVI YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVT YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVT YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVT YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVT YDIKIYETHY RMGVYAVDNS WETLVYC-EM TSGRIGAKVN HLPRLPEQVT YDIKIYETHY RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVT YNIKIYETHF RDGVYAVDNS WETLVYV-EM ASGRIGAKIN HQPRLPEQVI YNIKIYETHF RVGVYAVDNS WETLVYV-EM ASGRIGAKIN HMPRLPEQVI YNIKIYETHF RVGVYAVDNS WETLVYV-EM ASGRIGAKIN HMPRLPEQVI YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGANIN HLPRLPEQVV YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVV YDIKIYETHF RMGVYAIDNS WETLVYC-EM TSGRIGAKIN HQPRLPEQVV YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HQPRLPEQVV YDIKIYETHF RMGVYAVDNS WEALVYC-EM TSGRTGAKIN HQPRLPEQVV YDIKIYETHF RMGVYAVDNS WETLVYC-EM TSGRIGAKIN HLPRLPEQVF YDIKIWETVF RGGVYAIENS WETLVLC-EM TSGRCGAKMN HLPRLFEQVC YDIKIWETVF RGGVYAIENS WETLVLC-EM TSGRCGAKMN HLPRLFEQVC YDIKIWETFF RGGVYAIENS WETLVLC-EM TSGRCGAKMN HLPRLFEQVC YDIKIWETVF RGGVYAIENS WETLVLC-EM TSGRCGAKMN HLPRLFEQVC YDIKIWETAF RGGVYAIENS WETLVLC-EM TSGRSGAKMN HLPRLFEQVC YDIKIWETAF RGGVYAIENS WETLVLC-EM TSGRSGAKMN HLPRLFEQVC YDIKIWETAF RGGVYAMENS WETLVLC-EM TSGRVGARMN HLPRLFEQVC YDIKIWETVF RGGVYAIENS WETLILC-EM TSGRCGAKMN HMPRLFEQVC YDIKIWETHI RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI YDIKIWETHI RQGVYAVENS WETLITC-EM TSGRIGAKIN HLPRLPEQVV YDIKIWETHI RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI YDIKIWETHI RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI YDVKIWETHI RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI YDIKIWETHM RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI YDIKIWETHL RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI YDIKIWETHL RHGVYAVENS WETLVTC-EM TSGRIGAKIN HLPRLPEQVI YDIKIVETHL REGVYAVENS WETLVVC-EM TSGRIGAKIN HLPRLPEQVI YDIKIVETHL REGVYAIENS WETLVVC-EM TSGRMGAKIN HLPRLPEQVI YDIKIVETHL REGVYAVENS WETLVVC-EM TSGRIGAKIN HLPRLPEQVI YDIKIIETHL RNGVYAVENS WETLVVC-EM TSGRIGAKIN HLPRLPEQVI YDIKIMETHL RNGVYAVENS WETLVVC-EM TSGRIGAKIN HLPRLPEQVI YDIKIDETRY RDGVYARRNS WETANVC-EM SSGRSGAKIN HNPRLPEQVT MNITIQCLHT GEGVMCNVKC KEFLQREDEM KAGLIGIICN HLSRMIMVIL MNITIQCLHT GEGVVCNVKC KEFLQREDEM KAGLIGIICN HLSRMIMVIL MNITIQCLHT GEGVVCNVKC KEFLQREDEM KAGLIGIICN HLSRMIMVIL VNIIIQCLHT NEGVVCNVKC KEFLQREEDM KSGLIGIICN HISRMCMHIL INIIIQVLHT NEGVVCNVKC KEFLQREDDM KSGLIGIICN HISRMCMHIL -PPCTITVSG LKGKLHNRWP VLTQITRTMR IQIAIRVSGS E-SENVVLQY -PPCTITVSG LKGKLHNRWP VLTQITRTMR IQIAIRVSGS E-SENVVLQY -PPCTITVSG LKGKLHNRWP VLTQITRTMR IQIAIRVSGS E-SENVVLQY -PPCTITVSG LKGKLHNRWP VLTQITRTMR IQIPIRISGS E-SENVVLQY -PPVELTVSG LKGKV-NGWP YLTDITRTPL IRIKIREPGS E-SENTVVMY -PPVELTVSG LKGKV-NGWP YLTDITRTPL IRIKIREPGS E-SENTVVMY -PPVELTVSG LKGKV-NGWP YLTDITRTPL IRIKIREPGS E-SENTVVMY -PPVELTVSG LKGKV-NGWP YLTDITRTPL IGIKIREPGS Q-SENTVVMY -PPVELTVSG LKGKV-NGWP YLTDITRTPL IGIKIREPGS Q-SENTVVMY -PPVELTVSG LKGKV-NGWP YLTDITRTPL IGIKIREPGS Q-SENTVVMY -PPCELTVSG LKGKH-VGWP YLTDITRTPL IRIQIREPGS E-CENSVVMY -PPCDLTVSG LKGKQ-NGLP YLTDITRTPL IRIQIRVPGS E-SENVVVMY -PPCDLTVSG LKGKQ-NGWP YLTDITRTPL IRIQIRVPGS E-SENITVMY -PPCELTVSG LKGKH-NGWP YLTDITRTPL IKIHIRVPGS Q-SENHVVMY -PPCELTVSG LKGKH-NGWP YLTEITRTPL IKIHIRVPGS Q-SENHVVMY -PPCDLTVSG LKGKH-NGWP YLTDITRTPL IKIHIRIPGS Q-SENHVVMY -PPCELTVSG LKGKH-NGWP YLTDITRTPL IRIHIRVPGS E-SENHVVMY -PPCELTVSG LKGKH-NGWP YLTDITRTPL IRIHIRVPGS E-SENHVVMY -PPCELTVSG LKGKH-NGWP YLTDITRTPL IRIHIRVPGS E-SENHVVMY -PPCNLTVSN LKGMI-PPCP ALTEITRTV- -FIV--MYYA I-LRNKIMQY -PPCCLTVSN LKGMI-PPVP ALTEITRTP- -FIV--MYVA I-LRNDIMQY -PPCCLTVSN LKGMI-PPCP ALTEITRTP- -FIV--MYVA I-LRNDIMQY -PPCCLTVSN LKGMI-PPCP ALTEITRTP- -FIV--MYVA I-LRNDIMQY -PPCCLTVSN LKGMI-PPCP ALTEITRTP- -FIV--MYVA I-LRNDIMQY -PPCCLTVSN LKGMI-PPCP ALTEITRTP- -FIM--MYVA M-LRNDIMQY -PPCCLTVSN LKGMI-PPVP ALTEITRTP- -FIV--MYVA I-LRNDIMQY -PPCCLTVSN LKGMI-PPVP ALTEITRTP- -FIV--MYVA I-LRNDIMQY -PPCSLTVSN LKGMM-PPCP AMTEITRTV- -III--MYYA I-LRNEIMQY -PPCKLTVSN LKGVA-ASCP AITEITRTA- -VIV--MYGS I-LRNDVMQY -PPCVLTVSN LKGVA-ASCP AITEITRTA- -VIV--MYGS I-LRNDVMQY -PPCHLTVSN LKGVA-ASCP AITEITRTA- -VIV--MYVS I-LRNDVMQY -PPCVLTVSN LKGVA-ASCP AITEITRTA- -VIV--MYVS I-LRNDVMQY -PPCVLTVSN LKGVA-ASCP AITEITRTA- -VIV--MYVS I-LRNDVMQY -PPCVLTVSN LKGVA-ASCP AMTEITRTA- -VIV--MYVS I-LRNDVMRY -PPCVLTVSN LKGVA-ASCP AMTEITRTA- -VIV--MYVS I-LRNDVMRY -PPCVLTVSN LKGVA-ASCP AMTEITRTA- -VIV--MYVS I-LRNDVMRY -PPCHLTVSN LKGCC-ASVP AITEITRTV- -VIC--MYVS I-LCNVVMQY -PPCVLTVSN LKGCC-ASCP ANTEITRTV- -VIC--MYVS I-LRNVVMQY -PPCHLTVSN LKGVC-ASCP AITEITRTV- -VIC--MYVS I-LRNDVMQY -PPCHLTVSN LKGVC-ASCP AITEITRTV- -VIC--MYVS I-LRNDVMQY -PPCHLTVSN LKGVC-ASCP AITEITRTV- -VIC--MYVS I-LRNDVMQY -PPCHLTVSN LKGVC-ASCP DITEITRTV- -VIC--MYVS I-LRNDVMQY -PPCHLTVSN LKGVC-ASCP DITEITRTV- -VIC--MYIS I-LRNDVMQY -PPCHLTVSN LKGVC-ASCP DITEITRTV- -VIC--MYIS I-LRNDVMQY NPPCHLTVSN LKGVC-ASCP AITEITRTV- -VIV--MWIS I-LRNDVMQY NPPCHLTVSN LKGVC-ASCP AITEITRTV- -VIV--MWIS I-LRNDVMQY -PPCHLTVSN LKGVC-ASCP AITEITRTV- -VIC--MYVS I-LRNDVMQY -PPCHLTVSN LKGVC-ASVP AITEITRTV- -VIC--MYIS I-LRNNVMQY -PPCHLTVSN LKGVC-ASVP AITEITRTV- -VIC--MYIS I-LRNNVMQY -PPCHLTVSN LKGVC-ASCP AITEITRTV- -VIC--MYVS I-LRNVVMQY -PPCMLTVSG LKDTM-AGMK QETEITKTPD IKIY--MWGT INFKNRVMQY -PPCMLTVSG LKDTM-AGMK QETEITKTPD IKIY--MWGT ICFKNRVMQY -PPCMLTVSG LKDTM-AGMK QETEITKTPD IEIY--MWGT INFKNQVMQY -PPCMLTVSG LKDTM-AGMK QETEITKTPD IEIY--MWGT INFKNQVMQY -PPCMLTVSG LKDTM-AGMK QETEITKTPD IEIY--MWGT INFKNQVMQY -PPCMLTVSG LKDTM-AGMK QETEITKTPD IEIY--MWGT INFKNQVMQY -PPCVLTVSG LKDTM-AGMK HGTEITKTPD IKIH--MWGT INFKNKVMQY -PPCVLTVSG LKDTM-AGMK HGTEITKTPD IKIH--MWGT INFKNKVMQY -PPCILTVSG LKRTV-AGAP DETEITKSPV IHIV--MWAD IMFKNCVMQY -PPCILTVSG LKRTV-AGAP DETEITKSPV IHIV--MWAD IMFKNCVMQY -PPCDLTVSG LKRTV-AGPP DETEITKSPV IHIV--MWAD IVFKNCVMQY -PPCILTVSG LKRTI-AGAP DETEITKSPV IHIV--MWAD IMFKNCVMQY -PPWVLTVSG LKRTV-AGAP DETEITKSPI IHII--MWEE IMFKNCVMQY -PPWVLTVSG LKRTV-AGAP DETEITKSPI IHII--MWED IMFKNCVMQY -PPCILTVSG LKRTI-ADEP DETEITKTPI IHIV--MWAD IMFKNCVMQY -PPCILTVSG LKRTI-ADEP DETEITKTPI IHIV--MWAD IMFKNCVMQY -PPCILTVSG LKRTI-ADEP DETEITKTPI IHIV--MWAD IMFKNCVMQY -PPCILTVSG LKRTI-AGSP DETEITKTLI YHIV--MWAD IMFKNCVMQY -PPCILTVSG LKRTI-AGAP DETEITKTLI YHIV--MWAD IMFKNCVMQY -PPCVLTVSG LKRTI-AGAP DETEITKTLI YHIV--MWAD IMFKNCVMQY -PPCILTVSG LKRTI-AGAP DETEITKTLI YHIV--MWAD IMFKNCVMQY -PPCILTVSG LKRTI-AGAP DETEITKTLI YHIV--MWAD IMFKNCVMQY -PPCILTVSG LKRTI-AGAP DETEITKTLI YHIV--MWAD IMFKNCVMQY -PPCILTVSG LKETK-AGLI DGTEITKTPD IGIC--MWET IHFKNPVMQY -PPCILTVSG LKETK-AGII DGTEITKTPE IGIC--MWDT IEFKNPVMQY -PPCILTVSG LKETR-AGII EGTEITKTPD IGIC--MWDT IHFKNPVMQY -PPCILTVSG LKETR-AGII DGTEITKTPD IGIC--MWDT IHFKNPVMQY -PPCLLTVSG LKETV-AGII DGTEITKTPE IGIC--MWDT IHFKNPVMQY -PPCLLTVSG LKETV-AGII DGTEITKTPE IGIC--MWDT IHFKNPVMQY -PPCILTVSG LKETV-AGVI DGTEITKTPE IGIC--MWDT IHFKNPVMQY -PPCILTVSG LKETM-AGVI DGTEITKTPD IGIG--MWDS IHFKNPVMQY -PPCVLTVSG LKQPM-AGYN DQTEITKTPD ICIC--TWGT IHFKNFVMQY -PPCILTVSG LKQPM-AGYN DETQITKTPD ICIC--TWGT IHFKNSVMQY -PPCILTVSG LKQPM-AGYN DETQITKTPD ICIC--GVGT IHFKNLVMQY -PPCILTVSG LKQPV-AGYN DETQITKTPD ICIC--TWGT IHFKNSVMQY -PPCILTVSG LKQPM-AGYN METQITKTPD ICIC--TWGT IHFKNSVMQY -PPCILTVSG LKQPM-AGYN DETEITKTPD ICIC--TWGT IHFKNSVMQY -PPCILTVSG LKQPM-AGFV DETEITKTPD ICIC--TWGT IHFKNSVVQY -PPCILTVSG LKQPM-AGFV DETEITKTPD ICIC--TWGT IHFKNSVVQY -PPCILTVSG LKQSV-SGYN DETEITKTPD IKIC--TWGT IHFKNSVMQY -PPCILTVSG LKRAV-NGYN DETEITKTPD IKIC--TWGT IHFKNSVMQY -PPCILTVSG LKQAV-NDCN DETEITKTPD IKIC--TWPT IHFKNGMMQY -PPCMLTVSG LKQAM-AGVN DETEITKTPD IKIC--TWGT IHFKNFVMQY -PPCMLTVSG LKQAM-AGVN DETEITKTPD IKIC--TWGT IHFKNFVMQY -PPRSLTVSG LKDTM-PGHP DVTEITKTPV IAIV--MWGT INLKNHVMQY -VHCLLTHAG LKSME-IGFD R-TGVVRMPL IAEP--LYLV I-YVNAVEPY -VHCLLTHAG LKSME-IGFD R-TGVVRMPL IAEP--LYLV I-YVNAVEPY -VHCLLTHAG LKSME-IGFD R-TGVVRMPL IAEP--LYLV I-YVNAVEPY -VVCLLTHCG LKAME-IPFD I-TGVVRGPL ITEP--LYLI I-YVNAVEPY -VHCLLTHCG LKAME-IPFD R-TGVVRGPL IAEP--LYLI I-YVNAVEPY CLYYEENKSI R--------- --QNNTGSAD LPK------- ---------- CLYYEENKSI R--------- --QNNTGSAD LPK------- ---------- CLYYEENKSI R--------- --QNNTGSAD LPK------- ---------- CLYYEENKSI R--------- --QNNTGSAV LPK------- ---------- VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- VIYYKERKSI R--------- --QNNTGCAK LPQ------- ---------- MVYYKERKSI R--------- --QNNTGCAN LPQ------- ---------- MVYYKERKSI R--------- --QNNTGCAN LPQ------- ---------- CVYYKEHKSI R--------- --QNNTGCAN LPQ------- ---------- CVYYKEHKSI R--------- --QNNTGCAN LPQ------- ---------- CVYYKEHKSI R--------- --QNNTGCAN LPQ------- ---------- CVYYKEHKSI R--------- --QNNTGCAN LPQ------- ---------- CVYYKEHKSI R--------- --QNNTGCAN LPQ------- ---------- CVYYKEHKSI R--------- --QNNTGCAN LPQ------- ---------- CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- CFYYKENRSV R--------- --LNNTGRAE LPK------- ---------- CFYYKENRSV R--------- --INNTGQAE LPK------- ---------- VFYYRENRSV R--------- --QNNTGNAE LPK------- ---------- VFYYRENRSV R--------- --QNNTGNAE LPK------- ---------- VFYYRENRIV R--------- --QNNTGVAE LPK------- ---------- VFYYRENRIV R--------- --QNNTGVAE LPK------- ---------- VFYYRENRIV R--------- --QNNTGVAE LPK------- ---------- VFYYRENRIA R--------- --QNNTGVAE LPK------- ---------- VFYYRENRIA R--------- --QNNTGVAE LPK------- ---------- VFYYRENRIA R--------- --QNNTGVAE LPK------- ---------- VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- VFYYKENRSV R--------- --QNNTGAAE LPK------- ---------- VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- VVYYKENRSV R--------- --QNNTGKAE LPR------- ---------- VFYYKENRSV R--------- --QNNTGKAE LPR------- ---------- VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- VFYYKENRSV R--------- --QNNTGKAE LPK------- ---------- CNYYKENRSV R--------- --INNTGLAE LPK------- ---------- CNYYKENRSV R--------- --INNTGLAE LPK------- ---------- CNYYKENRSV R--------- --INNTGGAE LPK------- ---------- CNYYKENRSV R--------- --INNTGGAE LPK------- ---------- CNYYKENRSV R--------- --INNTGGAE LPK------- ---------- CNYYKENRSV R--------- --INNTGGAE LPK------- ---------- CTYYKENRSV R--------- --INNTGGAE LPK------- ---------- CTYYKENRSV R--------- --INNTGGAE LPK------- ---------- CTYYKENRSV R--------- --ANNTGGAE LPK------- ---------- CTYYKENRSV R--------- --ANNTGGAE LPK------- ---------- CTYYKENRSV R--------- --ANNTGGAE LPK------- ---------- CTYYKENRSV R--------- --ANNTGGAE LPK------- ---------- CTYYKENRSV R--------- --ANNTGGAE LPK------- ---------- CTYYKENRSV R--------- --ANNTGGAE LPK------- ---------- CNYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CNYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CNYYKENRSV R--------- --VNNTGGAE LPK------- ---------- STYYKENRSV R--------- --ANNTGGAE LPK------- ---------- STYYKENRSV R--------- --GNNTGGAE LPK------- ---------- STYYKENRSV R--------- --VNNTGGAE LPK------- ---------- STYYKENRSV R--------- --VNNTGGAE LPK------- ---------- STYYKENRSV R--------- --VNNTGGAE LPK------- ---------- ATYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --VNNTGGAA LPK------- ---------- CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKEPRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CVYYKENRSV R--------- --VNNTGGAE LPQ------- ---------- CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --VNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --LNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --FNNTGGAE LPK------- ---------- CCYYKENRSV R--------- --FNNTGGAE LPK------- ---------- CVYYKENRSV R--------- --FNNTGGAE LPK------- ---------- CVYYKENRSV R--------- --FNNTGGAE LPK------- ---------- CCYYKETRSV R--------- --VNNTGGAE LPK------- ---------- TDAYKKPKSM HQFQFDDCQI RYRTNTGFEE TPVGATHLTH VCVCVPHPWT TDAYKKPKSM HQFQFDDCQI RYRTNTGFEE TPMGATHLTH VCVCVPHPWT TDAYKKPKSM HQFQFDDCQI RYRTNTGFEE TPMGATHLTH VCVCVPHPWT TDAYKKPKSM HQFQFDDVQI RYRTNTGFEE NPKLATHLVH VCVVVPHPWT TDAYKKPKSM HQFQFDDVQI RYRTNTGFEE NPKLATHLVH VCVVVPHPWT ----DRLVNS FVD-ELYNSI ---YTAAPKK K--RHL-GIG DNGGMELVRE ----DRLVNS FVD-ELYNSI ---YTAAPKK K--RHL-GIG DNGGMELVRE ----DRLVNS FVD-ELYNSI ---YTAAPKK K--RHL-GIG DNGGMELVRE ----DRLVNN FVD-ELYNSV ---YTAAPKK K--RHL-GIG DNGGMELVRE ----ARLVNK VVD-ELYNNV ---YHAIPKK E--MNQ-GVG DSGGVEMVQE ----ARLVNK VVD-ELYNNV ---YHAIPKK D--MNQ-GVG DSGGVEMVQE ----ARLVNK VVD-ELYNNV ---YHAIPKK D--MNQ-GVG DSGGVEMVQE ----HRLVNK VVD-ELYNNV ---YHAIPKK D--MNQ-GVG DSGGVEMVQE ----HRLVNK VVD-ELYNNP ---YHAIPKK D--MNQ-GVG DSGGIEMVQE ----HRLVNK VVD-ELYNNP ---YHAIPKK D--MNQ-GVG DSGGIEMVQE ----HRLVNK VVD-ELYNNV ---YHAIPKK D--INV-GVG DSGGMEVVQE ----HRLVVK VAD-ELYNNI ---YHAIPKK D--GVLLGIG DSGGMEMVQE ----HRLVVK VAD-ELYNNI ---YHAIPKK D--GVLLGIG DSGGMEMAQE ----HRLVDQ VVD-ELYSKF ---YHAIPKK P--GNM-GVG DSGGMEMVQE ----HRLVDQ VVD-ELYSKF ---YHAIPKK P--GNM-GVG DSGGMEMVQE ----HRLVDK VVD-ELYSKF ---YHAIPKK P--GNM-GIG DSGGMEMVQE ----HRLVDR VVD-ELYSKF ---YHAIPKK P--GNM-DVG DSGGMEMVME ----HRLVDK VVD-ELYSKF ---YHAIPKK P--GNM-DVG DSGGMEMVME ----HRLVDK VVD-ELYSKF ---YHAIPKK P--GNM-DVG DSGGMEMVME ----HHMQNI FID-ELYPGG ---YHAAPKK E--EMV-GPG DNGGVEDIKE ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-SPG DNGGVEAIRE ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-SPG DNGGVEAIRE ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-SPG DNGGVEAIRE ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-SPG DNGGVEAIRE ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-APG DNGGVEAIRE ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-APG DNGGVEAIRE ----HHMQNI FID-ELYPGP ---YHAAPKK E--ELM-APG DNGGVEAIRE ----HHVQNV FID-ELYPGH ---YHAAPKK N--ENV-GPG DNGGVEGIKE ----HHMQDV FID-ELYLGP ---YHASPKK D--RFC-NPG DNGGMETYQE ----HHMQDV FID-ELYLGP ---YHASPKK D--RFC-NPG DNGGMDTYQE ----HHMQDV FID-ELYLGP ---YHASPKK E--RFC-NVG DNGGMETIQE ----HHMQDV FID-ELYLGP ---YHASPKK E--RFC-NPG DNGGMETIQE ----HHMQDV FID-ELYLGP ---YHASPKK E--RFC-NPG DNGGMETIQE ----HHMQDI FID-ELYLGP ---YHASPKK Q--RFC-NPG ENGGMETIRE ----HHMQDI FID-ELYLGP ---YHASPKK E--RFC-NPG ENGGMETIRE ----HHMQDI FID-ELYLGP ---YHASPKK E--RFC-NPG ENGGMETIRE ----HHMQDI FID-ELYPGQ ---YHASPKK K--KIC-GPG DNGGMETIEE ----HHMQDI FID-ELYPGL ---YHASPKK K--KVC-GPG DNGGMETIQE ----HHMQDI VID-ELYPGP ---YHATPKK H--RFC-GTG DNGGMQAIQE ----HHMQDI FID-ELYPGP ---YHATPKK H--RFC-GTG DNGGMQAIQE ----HHMQDI FID-ELYPGP ---DHATPKK H--RFC-GTG DNGGMQTIQE ----HHMQDI FID-ELYPGH ---YHATPKK N--RYC-GPG DNGGMQVIEE ----HHMQDI FID-ELYPGH ---YHATPKK N--RFC-GPG DNGGMQVIEE ----HHMQDI FID-ELYPGH ---YHATPKK N--RFC-GPG DNGGMQVIEE ----HPMQEI FID-ELYPGH ---YHATPKK N--RFC-GPG DNGGMQSMQE ----HPVQDI FID-ELYPGH ---YHATPKK V--RFC-GPG DNGGMQAIQE ----HHMQDI FID-ELYPGR ---YHATPKK N--RYC-GPG DNGGMQPIQE ----HHMQDI FID-ELYPGH ---YQATPKK D--KFC-GPG DNGGMQTIQE ----HHMQDI FID-ELYPGH ---YHATPKK D--KFC-GPG DNGGMQTIQE ----HHMQDI FID-ELYPGH ---YHATPKK N--RCC-GPG DNGGMQTVQE ----YHHQNI LID-ELYSNV ---YPAAPKK H--QYM-GVG DVGGYEVICE ----YHHQNI LID-ELYSNV ---YPAAPKK H--QYM-GVG DVGGYEMICE ----YHHQNI LID-ELYNNI ---YPAAPKK H--KFL-GVG DVGGYEIICE ----YHHQNI LID-ELYNNI ---YPAAPKK H--KFL-GVG DVGGYEIICE ----YHHQNI LID-ELYNDI ---YPAAPKK H--KFL-GVG DVGGYEIICE ----YHHQNI LID-ELYNNI ---YPAAPKK H--KFL-GVG DVGGYEIICE ----YHHQNI LID-ELYSNI ---YPATPKK H--QYM-GVG DVGGYEVICE ----YHHQNI LID-ELYSNI ---YPATPKK H--QYM-GVG DVGGYEVICE ----YHQQNI LTD-ELYSDV ---YPAAQKK Y--QVM-GVG DIGGYELICE ----YHQQNI LTD-ELYSDL ---YPAAPKK Y--QEM-GVG DIGGYELIVE ----YHQQNL LTD-ELYSDI ---YPAAPKK Y--QVM-GVG DIGGYELIVE ----YHQQNL LTD-ELYSDI ---YPAAPKK Y--QVM-GVG DIGGYELIVE ----YHQQNI LTD-ELYSDI ---YPAAPKK Y--QVM-GVG DIGGYELICE ----YHQQNI LTD-ELYSDI ---YPAAPKK Y--QVM-GVG DIGGYELICE ----YHQQNI LTD-ELYSDI ---YPAAPKK Y--QIM-GVG DIGGYELICE ----YHQQNI LTD-ELYSDI ---YPAAPKK Y--QIM-GVG DIGGYELICE ----YHQQNI LTD-ELYSDI ---YPAAPKK Y--QIM-GVG DIGGYELICE ----YHQQNI LTD-ELYSDM ---YPAAPKK YVIRVM-GVG DIGGYEMICE ----YHQQNI LTD-ELYSDV ---YPTAPKK YVIRVM-GVG DIGGYELICE ----YHQQNI LTD-ELYSDM ---YPAAPKK YVIRQM-GMG DIGGYELICE ----YHQQNI LTD-ELYSDM ---YPAAPKK YVIRQM-GMG DIGGYELICE ----YHQQNI LTD-ELYSDM ---YPAAPKK YVIRQM-GMG DIGGYELICE ----YHQQNI LTD-ELYSDM ---YPAAPKK YVIREM-GVG DIGGYELICE ----YHHQNT LVD-ELYSDV ---YPAAPKK K--VCM-GVG DVGGYEVMGE ----YHHQNT LVD-ELYSDI ---YPAAPKK K--VCM-GVG DVGGYEVMGE ----YHHQNT LVD-ELYSDI ---YPAAPKK K--VCM-GVG EVGGYEVMGE ----YHDQNT LVD-ELYSDV ---YPAAPKK K--VCM-GVG DVGGYEVMGE ----YHNQNT LVD-ELYSDV ---YPAAPKK Q--HYM-GVG DVGGYEVMGE ----YHNQNT LVD-ELYSDV ---YPAAPKK Q--HYM-GVG DVGGYEVMGE ----YHNFNA LVD-ELYSDV ---YPAAPKK K--HYM-GVG DVGGYEVMGE ----YHHQNS LVD-ELYSDV ---YPAAPKK K--HYM-GVG DVGGYEVMGE ----YHHQNI LVD-ELYSDM ---YPAAPKK K--HYV-GAG DVGGYEVMSE ----YHHQNI LVD-ELYSDM ---YPAAPKK K--HYV-GAG DVGGYEVMSE ----YHHQNI LVD-ELYSDM ---YPAAPKK K--HYV-GAG DVGGYEVMSE ----YHHQNI LVD-ELYSDM ---YPAAPKK K--HYV-GAG DVGGYEVMSE ----YHHQNI LVD-ELYSDV ---YPAAPKK K--HYV-GAG DVGGYEVMSE ----YHHQNM LVD-ELYSDM ---YPAAPKK K--VYV-GAG DVGGYEVMSE ----YHHQNI LVD-ELYSDM ---YPAAPKK K--HYV-GAG DVGGYEVMSE ----YHHQNI LVD-ELYSDM ---YPAAPKK K--HYV-GAG DVGGYEVMSE ----YHHQNV LVD-ELYGEA ---YPAAPKK K--HYM-GAG DVGGYEVMSE ----YHHQNV LVD-ELYSEL ---YPAAPKK A--HYM-GAG DIGGYEVMSE ----YHHQNV LVD-ELYSEV ---YPAAPKK A--FYM-GAG DIGGYEVMSE ----YHHQNA LVD-ELYSDV ---YPAAPKK K--HYM-GEG DVGGYEVMSE ----YHHQNV LVD-ELYSDG ---YPAAPKK K--HYM-GEG DVGGYEVMSE ----YHYQNI LVD-ELYSND ---YPAAPKK A--QFM-GVG DVGGYEIVCE KGKSEILQNM GSA-VLYNDV IRDHNASEQK E--AHV-PMG DEGRISRAKD KGKSEILQNM GSA-VLYNDV IRDHNASEQK E--AHV-PMG DQGRISRAKD KGKSEILQNM GSA-VLYNDV IRDYNASEQK E--AHV-PMG DQGRISRAKD KGKSEILQNM ESA-HLYNAV IRDNNASEQK E--AHV-PVG DEGRISRAKN KGKSEILQNM ESARHLYNAV IRDNNASHQK E--AHV-PAG DQGRISRAKN DIFQILVCV DIFQILVCV DIFQILVCV DIFQILVCV DVFQILVCI DVFQILVCI DVFQILVCI DVFQILVCI DVFQILVCI DVFQILVCI DVFQILVCI DLFQILVCA DLFQILVCV DVFQILVCL DVFQILVCL DLFQILVCI DVFQILVCV DVFQILVCV DVFQILVCA DMFQVLVCR DMFQVLVCK DMFQVLVCK DMFQVLVCK DMFQVLVCK DMFQVLVCK DMFQVLVCK DMFQVLVCK DMFQVLVCQ DMFQVLVCV DMFQVLVCV DMFQVLVCI DMFQVLVCV DMFQVLVCV DMFQVLVCV DMFQVLVCV DMFQVLVCV DMFQVLVCV DMFQVLVCV DMFQVLVCV DMFQVLVCV DMFQVLVCV DMFQVLVYV DMFQVLVYV DMFQVLVYV DVFQVLVCV DVFQVLVCV DMFQVLVCV DMFQVLVCV DMFQVLVCV DMFQVLVCV NLFQILVVE NLFQILVVE NLFQILVVE NLFQILVVE NLFQILVVE NLFQILVVE NLFQILVVE NLFQILVVE DLFQILVCQ DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCQ DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVCE DLFQILVHE DLFQILVHE DLFQILVHE DLFQILVHE DLFQILVHG DLFQILVHA DLFQILVHE DLFQILVHE DLFQILVHE DLFQILVHE DLFQILVHK DLFQILVHA DLFQILVHA DIFQILVCD EIMYIRDLE EIMYIRDLE EIMYIRDLE EILYIRDLE EILYIRDLE bpp-seq-2.1.0/test/example.mase000644 000000 000000 00000363124 12147656566 016353 0ustar00rootroot000000 000000 ; t73 LIVSQIRVMVRDG--IHKAMDEE-V------------LIARRVKPYSGNGTQVRNDVEDAN--CQEFVGI---RELGKH-YK-CMDGFHTVNNGAGEN-S ESAMWIFDLWMCKLNHGMQR--------GDIGRVKRDVQKFPKLKEGAPNCSSFVKPYFMGCDMFHKQVEYRGTHGLVLDDTWNEESATFPYPQVHSRSD KRCLNGLHQGDHEESVH---HAPRI----MR---LIGH-HLVIIVLMNHDMGRLNHLEPDAVIPSPLRIGGDGWVPKPI------------------EDG FFDK-D-Q------SRPDVANASIVPDK----DQWVGAHNQWGSSLRKVGLAVHDLR-NISDLYRCVWMDCHLGANHYRQISRMKMITPTYTHMTEYANG L----WHPFYKASDHKNEAQGVGEK-PHQLPCVESETCQYETKHAKVCKQVHG-ALYLIGMKTVVKPLLGDWEGCKCRS---KLYVSQLDH----KTLSK -NLGLY--MQKRTWLATAPLQ------IGCMFMLVGRKKNSEE-NYNKAADPTVIWFYTQPIVYV-ADVFGCTKGKNPQEMRG------NNVMSESLGDD NLQD--MSGVPVTVCTSSVMVRKDMQD-SVDKRGCTWNAKE-DHLCPSSFCKGER---EDEPGGVTQ-----------------RCIENIAKLLYIKDV- LCNEVLNGLQWQLCWSV-GDWESLIPQACWDAKKDLAVCAWKMELVPGL---NRNNENLAKVIYFGPDGH-------DEGPMQAKIVTLHW-EMDVSHRG -SSVDDNVV-VVMLAFAVSFCHPWGHYIQGLGD-QHKLARPNT---AQKLSILT---VFHFSGGFRDKPM-ERSQLHS-TYSED----KKDQKVLIHAMK -RNKGLDWHAGND-MKGGPK-IIYLGMLFY-------AENNVAKQKAHFVVFL-ISDATRLLIVHEGCMIDYTFMEFACHPFFAELFMEHMVARYQYYSN VDIKIFDTCFRSAVYAVDNSWETLCDY-EMLSGYFGAEINHNPRLPEQVC-PPCTITVSGLKGKLHNRWPVLTQITRTMRIQIAIRVSGSE-SENVVLQY CLYYEENKSIR-----------QNNTGSADLPK---------------------DRLVNSFVD-ELYNSI---YTAAPKKK--RHL-GIGDNGGMELVRE DIFQILVCV ; t66 LIVSQIRVMVRDG--IHKAMDEE-V------------LIARRVKPCSGNGTQVRNDVEDAN--CQEFVGI---RELGKH-YK-CMDGFHTVNNGAGEN-S ESAMWIFDLWMCKLNHGMQR--------GDIGRVKRDVQKFPKLKEGAPNCSSFVKPYFMGCDMFHKQVEYRGTHGLVLDDTWNEESATFPYPQVHSRSD KRCLNGLHQGDHEESVH---HAPRI----MR---LIGH-HLVIIVLMNHDMGRLNHLEPDAVIPSPLRIGGDGWVPKPI------------------EDG FFDK-D-Q------SRPDVANASIVPDK----DQWVGAHNQWGSSLRKVGLAVHDLR-NISDLYRCVWMDCHLGANHYRQISRMKMITPTYTHMTEYANG L----WHPFYKASDHKNEAQGVGEK-PHQLPCVESETCQYETKHAKVCKQVHG-ALYLIGMKTVVKPLLGDWEGCKCRS---KLYVSQLDH----KTLSK -NLGLY--MQKRTWLATAPLQ------IGCMFMLVGRKKNSEE-NYNKAADPTVIWFYTQPIVYV-ADVFGCTKRKNPQEMRG------NNVMSESLGDD NLQD--MSGVPVTVCTSSVMVRKDMQD-SVDKRGCTWNAKE-DHLCPSSFCKGER---EDEPGGVTQ-----------------RCIENIAKLLYIKDV- LCNEVLNGLQWQLCWSV-GDWESLIPQACWDAKKDLAVCAWKMELVPGL---NRNNENLAKVIYFGPDGH-------DEGPMQAKIVTLHW-EMDVSHRG -SSVDDNVV-VVMLAFAVSFCHPWGHYIQGLGD-QHKLARPNT---AQKLSILT---VFHFSGGFRDKPM-ERSQLHS-TYSED----KKDQKVLIHAMK -RNKGLDWHAGND-MKGGPK-IIYLGMLFY-------AENNVAKQKAHFVVFL-ISDATRLLIVHEGCMIDYTFMEFACHPFFAELFMEHMVARYQYYSN VDIKIFDTCFRSAVYAVDNSWETLCDY-EMLSGYFGAEINHNPRLPEQVC-PPCTITVSGLKGKLHNRWPVLTQITRTMRIQIAIRVSGSE-SENVVLQY CLYYEENKSIR-----------QNNTGSADLPK---------------------DRLVNSFVD-ELYNSI---YTAAPKKK--RHL-GIGDNGGMELVRE DIFQILVCV ; t32 LIVSQIRVMVRDG--IHKAMDEE-V------------LIARRVKPYSGNGTQVRNDVEDAN--CQEFVGI---RELGKH-YK-CMDGFHTVNNGAGEN-S ESAMWIFDLWMCKLNHGMQR--------GDIGRVKQDVQKFPKLKEGAPNCSSFVKPYFMGCDMFHKQVEYRGTHGLVLDDTWNEESATFPYPQVHSRSD KRCLNGLHQGDHEESVH---HAPRI----MR---LIGH-HLVIIVLMNHDMGRLNHLEPDAVIPSPLRIGGDGWVPKPI------------------EDG FFDK-D-Q------ARPDVANASIVPDK----DQWVGAHNQWGSSLRKVGLAVHDLR-NISDLYRCVWMDCHLGANHYRQISRMKMITPTYTHMTEYANG L----WHPFYKASDHKNEAQGVGEK-PHQLPCVESETCQYETKHAKVCKQVHG-ALYLIGMKTVVKPLLGDWEGCKCRS---KLYVSQLDH----KTLSK -NLGLY--MQKRTWLATAPLQ------IGCMFMLVGRKKNSEE-NYNKAADPTVIWFYTQPIVYV-ADVFGCTKGKNPQEMRG------NNVMSESLGDD NLQD--MSGVPVTVCTSSVMVRKDMQD-SVDKRGCTWNAKE-DHLCPSSFCKGER---EDEPGGVTQ-----------------RCIENIAKLLYIKDV- LCNDVLNLLVWQLCWSV-GDWESLIPQACWDAKKDLAVCAWKMELVPGL---NRNNENLAKVIYFGPDGH-------DEGPMQAKIVTLHW-EMDVSHRG -SSVDDNVV-VVMLAFAVSFCHPWGHYIQGLGD-QHKLARPNT---AQKLSILT---VFHFSGGFRDKPM-ERSQLHS-TYSED----KKDQKVLIHAMK -RNKGLDWHAGND-MKGGPK-IIYLGMLFY-------AENNVAKQKAHFVVFL-LSDATRLLIVHEGCMIDYTFMEFACHPFFAELFMEHMVARYQYYSN VDIKIFDTCFRSAVYAVDNSWETLCDY-EMLSGYFGAEINHNPRLPEQVC-PPCTITVSGLKGKLHNRWPVLTQITRTMRIQIAIRVSGSE-SENVVLQY CLYYEENKSIR-----------QNNTGSADLPK---------------------DRLVNSFVD-ELYNSI---YTAAPKKK--RHL-GIGDNGGMELVRE DIFQILVCV ; t75 LIVSEIRVMVRDE--VHKAMDEE-C------------LIARRVKPYSGNGNQIRNDIEDAN--GQEFVGI---RELGKH-YD-CMDGYHTVNNGAGEN-S ESAMWIFDLWMCKLNHGMQR--------GDIGRVQQDVHKFPKLKEGAPNCSSFVKPVFMGCDMFHRQVQNRGNHGLVLDDTWNEESGTFPYPQVHSRNE KRCINKLHQGDHNESCH---HNPRI----VR---RIGH-HLVIIVLVNRDMGRLNYIEPDALVPSPLRFGGDEWVPKPI------------------EDG FFDK-D-Q------ARPDVANA-IVPDR----DQWVGAHKEWGSSLCKVGLAVHDLR-NVSDLYRCVWMECVLGANHYQQVSRMKMVTPAYTHMTEYANG L----WHPFYKNPDHKNEAQGVGEK-PHQLPCVESETCQYETKHAKVCKQVHG-ALYLIGMKTVAKPVLGDWEGCKCRSDLIKLYVSQLDH----KTLSK -NMGLY--MRNRTWLATSPLQ------IGCIFMLVGRKKNSEE-NYNKAADPTVIWFYTQPIVYV-ADVFGCTKGKNPQEMRK------NNVISESLGDD NLQD--MPGMPVTVCTSSVMVRKDMHQ-SVDKRGYTWHAKE-DHLCPDSFCKGEK---EEEPGGVRR-----------------RIVENIVKLLYIKDV- LCNQVLNLLQWQLCWSV-GDWESLVPQACWGAKKDLAVCAWKMELVPGL---NRNNENLAKIIYFGPDGH-------DEGNMQAKIVTLHW-EMDVSHRG -SSVDDNVV-VVMLAFQASFCDPWGHYIQGLGD-QYKLARPNT---ARQLSILV---VMHFSGGFRDKPI-ERSQLHC-TYSEN----KKDQKVVIRAMK -RNKGCDWHAGKD-MAGGPK-IIYVGMLFY-------AESNWAKQKAHFVVFL-ISDATRLLIHHVGCMIEYTFMEFACHPFFAELFMEHMVIRYQYYNN VDIKIFDTCFRGAVYAEDNSWETLCGY-EMLSGYFGAEINHNPRLPEQVC-PPCTITVSGLKGKLHNRWPVLTQITRTMRIQIPIRISGSE-SENVVLQY CLYYEENKSIR-----------QNNTGSAVLPK---------------------DRLVNNFVD-ELYNSV---YTAAPKKK--RHL-GIGDNGGMELVRE DIFQILVCV ; t79 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTFSGSQNQVRNAVEDAA--RPDFVGT---RELGKQ-YE-CMDGVGAVDTGAGDN-S ESAVVIFDIWMCQLPHGMQW--------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPYFLGVDMFHREVILKGAKGLVLPD--NEDSMAFPYVQVHSRSV KRCLNVLHQGDHEESRH---HNARI----VR---RIGL-NLVIISYANVDMGRLNHCEDEAIVFSPLRYGGAGFVSKPI------------------GDG FFDK-D-Q------ARPLMANAVCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NISRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVMEYAHG L----WKPFYQASDHKNEAQGVGER-THQLPCVESHTCQYEVKHAKVCKLVHH-ALYLLGLRTVMKPHLGDVDGCRCRSDLNNLILSQLDD----KTLSK -NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQSAN-NYNRAGDPTTICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDD SLQQ--MPAVPVDVCVMSVMVNKSMPQ-SHDKRGYTWQAKQ-DHLFPVNVVKGEQ---EEEPEGADQ-----------------RTVHDMVRLLYSKDV- DCNMVLSLLVWQLC--H-GDWEPLLPQACESARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEGPMGQKIVDLHW-EMEASHRE -SSVRDNMI-VIMLPFNINSIDPWRHYILGLND-QIQLTRPNT---ARRLSILH---VMHYSGEFHDKSA-ERSQLHS-RYSEH----KNDHKCVILASK -HNKDHDWATGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNHFVIFL-ICDAVRILILHHGCMTDHTFMEFACNPFFSELFMEHVYIRYQYINN HDIKIKDTVFRLAVYAWDNFWETLCKC-EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPLIRIKIREPGSE-SENTVVMY VIYYKERKSIR-----------QNNTGCAKLPQ---------------------ARLVNKVVD-ELYNNV---YHAIPKKE--MNQ-GVGDSGGVEMVQE DVFQILVCI ; t27 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTMSGSQNQVRNAVEDAA--RPDFVGI---RELGKQ-YE-CMDGVGAVDTGAGDN-S ESAVVIFDIWMCQLPHGMQW--------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPYFLGCDMFHREVILKGAKGLVLPD--NEDSMAFPYVQVHSRSV KRCLNVLHQGDHEESRH---HNARI----VR---RIGM-NLVIISYANVDMGRLNHCEDQAIVYSPLRYGGAGFVSKPI------------------ADG FFDK-D-Q------ARPLMANAVCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NISRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVMEYAHG L----WKPFYQASDHKNEAQGVGER-THQLPCVESHTCQYEVKHAKVCKLVHH-ALYLLGIHTVMKPHLGNVDGCRCRSDLNNLILSQLDD----KTLSK -NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQSAN-NYNRAGDPTTICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDD SLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDKRGYTWQAKQ-DHLFPVNCVKGEQ---EEEPEGADQ-----------------RTVHDMVRLLYSKDV- DCNIVLSLLVWQLC--H-GDWEPLLPQACEGARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEGPMGQKIVDLHW-EMEASHRE -SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTRPNT---ARRLSILH---VMHYSGAFHDKSA-ERSQLHS-RYSEH----KNDHKCVILASK -VNKDHDWITGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNHFVIFL-ICDAVRILILHHGCMTDHTFMEFACNPFFSELFMEHVYIRYQYVNN HDIKIKDTVFRLAVYAWENFWETLCNC-EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPLIRIKIREPGSE-SENTVVMY VIYYKERKSIR-----------QNNTGCAKLPQ---------------------ARLVNKVVD-ELYNNV---YHAIPKKD--MNQ-GVGDSGGVEMVQE DVFQILVCI ; t93 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTMSGSQNQVRNAVEDAA--RPDFVGI---RELGKQ-YE-CMDGVGAVDTGAGDN-S ESAVVIFDIWMCQLPHGMQW--------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPYFLGCDMFHREVILKGAKGLVLPD--NEDSMAFPYVQVHSRSV KRCLNVLHQGDHEESRH---HNARI----VR---RIGM-NLVIISYANVDMGRLNHCEDQAIVYSPLRYGGAGFVSKPI------------------ADG FFDK-D-Q------ARPLMANAVCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NISRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVMEYAHG L----WKPFYQASDHKNEAQGVGER-THQLPCVESHTCQYEVKHAKVCKLVHH-ALYLLGIHTVMKPHLGNVDGCRCRSDLNNLILSQLDD----KTLSK -NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQSAN-NYNRAGDPTTICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDD SLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDKRGYTWQAKQ-DHLFPVNCVKGEQ---EEEPEGADQ-----------------RTVHDMVRLLYSKDV- DCNIVLSLLVWQLC--H-GDWEPLLPQACEGARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEGPMGQKIVDLHW-EMEASHRE -SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTRPNT---ARRLSILH---VMHYSGAFHDKSA-ERSQLHS-RYSEH----KNDHKCVILASK -VNKDHDWITGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNHFVIFL-ICDAVRILILHHGCMTDHTFMEFACNPFFSELFMEHVYIRYQYVNN HDIKIKDTVFRLAVYAWENFWETLCNC-EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPLIRIKIREPGSE-SENTVVMY VIYYKERKSIR-----------QNNTGCAKLPQ---------------------ARLVNKVVD-ELYNNV---YHAIPKKD--MNQ-GVGDSGGVEMVQE DVFQILVCI ; t59 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTFSGSQNQVRNAVEDAA--RPDFVGI---RELGKQ-YE-CMDGVGAVDTGAGDN-S ESAVVIFDVWMCQLPHGMQW--------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPYFLGCDMFHREVILKGAKGLVLPD--NEDSMAFPYVQVHSRSV KRCLNVLHQGDHEESRH---HNARI----VR---RIGM-NLVIISYANVDMGRLNHCEDEAIVFSPLRYGGAGFVSKPI------------------ADG FFDK-D-Q------ARPLMANAHCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NISRLLRCVVMECILGANVYHQISRMKMHGPTYSHVMEYAHG M----WKPFYQASDHKNEAQGVGER-THQLPCVDSHTCQYEVKHAQVCKLVHH-ALYLLGLRTVMKPHLGDVDGCRCRSDLNNLILSQLDD----KTLSK -NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQSGN-NYNQAGDPTTICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDD SLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDKRGYTWQAKQ-DHLFPVNFVKGEQ---EEEPEGADQ-----------------RTVHDMVRLLYSKDV- DCNIDLSLLVWQLC--H-GDWEPLLPQACESARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEGPMGQKIVDLHW-EMEASHRE -SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTRPNT---ACRLSILH---VMHYSGEFHDKSA-ERSQLHS-RYSEH----KNDHKCVILASK -HNKDHDWATGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNHFVIFL-ICDAVRILILHHGCMTDHTFMEFACNPFFAELFMEHVYIRYQYVNN HDIKIKDTVFRLAVYAWENFWETLCNC-EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPLIGIKIREPGSQ-SENTVVMY VIYYKERKSIR-----------QNNTGCAKLPQ---------------------HRLVNKVVD-ELYNNV---YHAIPKKD--MNQ-GVGDSGGVEMVQE DVFQILVCI ; t7 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKKFSGSQGQVRNAIEDAA--RPDFVGI---RELGKQ-YE-CMDGVGAVDTGAGDN-S ESAVVIFDIWMCQLPHGMQW--------GDIGRVVVDHPKFLKLEEGAPNCSSMPMPYFLGCDMFHREVILKGAKGLVLPD--NEDSMAFPYVQVHSRSV KRCLNVLHQGDHEESRH---HNARI----VR---RIGM-NLVIISYANVDMGRLNHCEDEAIVFSPLRYGGAGFVSKPI------------------ADG FFDK-D-Q------ARPLMANAHCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NISRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVMEYAHG M----WKPFYQASDHKNEAQGVGER-THQLPCVDSHTCQYEVKHAQVCKLVHH-ALYLLGLRTVMKPHLGDVDGCRCRSDLNNLILSQLDD----KTLSK -NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQSGN-NYNRAGDPTTICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDD SLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDKRGYTWQAKQ-DHLFPVNFVKGEQ---EEEPEGADQ-----------------RTVHDMVRLLYSKDV- DCNIVLSLLVWQLC--H-GDWEPLLPQACESARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEGPMGQKIVDLHW-EMEASHRE -SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTRPNT---ACRLSILH---VMHYSGEFHDKSA-ERSQLHS-RYSEH----KNDHKCVILASK -HNKDHDWATGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNHFVIFL-ICDAVRILILHHGCMTDRTFMEFACNPFFAELFMEHVYIRYQYVNN HDIKIKDTVFRLAVYAWENFWETLCNC-EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPLIGIKIREPGSQ-SENTVVMY VIYYKERKSIR-----------QNNTGCAKLPQ---------------------HRLVNKVVD-ELYNNP---YHAIPKKD--MNQ-GVGDSGGIEMVQE DVFQILVCI ; t78 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTFSGSQGQVRNAIEDAA--RPDFVGI---RELGKQ-YE-CMDGVGAVDTGAGDN-S ESAVVIFDIWMCQLPHGMQW--------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPYFLGCDMFHREVILKGAKGLVLPD--NEDSMAFPYVQVHSRSV KRCLNVLHQGDHEESRH---HNARI----VR---RIGM-NLVIISYANVDMGRLNHCEDEAIVFSPLRYGGAGFVSKPI------------------ADG FFDK-D-Q------ARPLMANAHCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NISRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVMEYAHG M----WKPFYQASDHKNEAQGVGER-THQLPCVDSHTCQYEVKHAQVCKLVHH-ALYLLGLRTVMKPHLGDVDGCRCRSDLNNLILSQLDD----KTLSK -NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQSGN-NYNRAGDPTTICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDD SLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDKRGYTWQAKQ-DHLFPVNFVKGEQ---EEEPEGADQ-----------------RTVHDMVRLLYSKDV- DCNIVLSLLVWQLC--H-GDWEPLLPQACESARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEGPMGQKIVDLHW-EMEASHRE -SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTRPNT---ACRLSILH---VMHYSGEFHDKSA-ERSQLHS-RYSEH----KNDHKCVILASK -HNKDHDWATGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNHFVIFL-ICDAVRILILHHGCMTDRTFMEFACNPFFAELFMEHVYIRYQYVNN HDIKIKDTVFRLAVYAWENFWETLCNC-EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPLIGIKIREPGSQ-SENTVVMY VIYYKERKSIR-----------QNNTGCAKLPQ---------------------HRLVNKVVD-ELYNNP---YHAIPKKD--MNQ-GVGDSGGIEMVQE DVFQILVCI ; t12 LIVSEVRVMVRDG--INIAIDEL-C------------LIANRVKAFSGHQNQVRNAMEDAQ--RPDFVGI---RELGKQ-YQ-CMDGHGAVDTGAGRN-S ESAVWIFDIWMCKLAHGMQW--------GDIGRVSVDHPKFLKLEEGAPRCSSCPMPYFLGCEMFHRQVILKGAKGLVLPD--NEDSMAFPYVQVHSRSV KRCLNVLVQGDHDESRH---HNARI----VA---RIGM-NMVIIRYLNVEMGRLNHCEDDAMVYSPLRIGGAGTHSKPI------------------ADG FFDK-D-Q------ARPLIANAHIVPEN----DQWTGAVAQWCSSVIKCGLANHDLE-NISRLLRCVVMECVLGANVYHQASRMKMHGPYYAHVTEYAHG L----WKPFYQTSDHKNEAQGVGES-THQLPCVESQTCQYEVKHAKVCKLVHH-ALYLLALHTMMKPRLGDIDGCRCRSELNKLALSQLDD----KTLSK -NCFQY--LGNMTWLATSPLH------VGCMIIF------------------------------------------------V------NNVLSQSLGDD SLQQ--MHAMPVTVVVVSHMVKKSMPQ-SHDKRGYTWQAKDFDQLIPVSFVKGEQ---EEEPEGPNN-----------------RVVHNIVKLLYTKDV- DCNTVLSLLIWQLC--H-GDWEQLVPQACAGARSDLAVCAWKRELVPGL---NHNNENLAKVIYFGPDGH-------DEGPMGRKIVDLHW-EMEASHRD -SSVRDNML-VIMLPFLVNNLDPWRHYILGLND-QIQLTRPNT---ARRLSILM---VMHYSGEFHDKSP-ERSQLHFDSYSEH----KNDHKCVILASK -HNKGLDWATGKDDMKGGAK-IMMSGALFY-------AEQNWIRARNHFVIFL-ICDAVRILIVLHGCMTAWTFMEFACNPFFAELFMEHVYIRYQYVNN HDIKIKDTEFRLAVYAWENSWETLCNC-EMLSGHIGAKINHNPRLFEQVC-PPCELTVSGLKGKH-VGWPYLTDITRTPLIRIQIREPGSE-CENSVVMY VIYYKERKSIR-----------QNNTGCAKLPQ---------------------HRLVNKVVD-ELYNNV---YHAIPKKD--INV-GVGDSGGMEVVQE DVFQILVCI ; t81 LIVSEVRHMVRDG--ANVAIDEL-C------------LIACRVKAFSGHGHQVRNAVEDAA--RPDFIGI---RELGKP-Y--CMDGHGAVNTGAGHN-S ESAVWIFDIWMYKLSHGMQW--------GDIGRVGVDHPKFLKLEEGAPNCSSLPMPYFLGVDMFHRQVTIKGAKGLVLPDIWNESPMAFPYAQVHSKSH KRCLNVLVQGDHEESEH---HNARI----MS---KIGT-HLVIINIINVEMQRLNHCEDKAIVYSPDRIGGAGFHSKPI------------------ADG FFDK-D-Q------ARPIIANAHIVPVN----DQWTGPMAQWCSSVIKCGLANHELE-DVSRLLRCVGMECNLGANHYHQVSRMKMHGPIYSHMTEYAVG L----WKPFYQASEHKNEAQGLGER-PHQLPCVESQTCQYEIKVAKVCKLVHV-ALYLVGLKTVVKPKLGDWDGIRCRSELNKLYLSQLDD----KTLSK -NCFIY--LGNMTWLATSPLQ------IGWLVLFDGRKRQSAE-DYNRAADPTTISFCRKPIVQH-ADVFGCDRDKDPQEMRT------NNVISESLGDD FLQQ--MPAVPVSVCVYSHMVKKFVPQ-SHEKRGYTWKAKE-DHLVPISYCKGEH---EEEPEGAQY-----------------RVVQNIVKLLYTKDV- DCNVVLSLLVWQLC--H-GDWEPLIPQACQTAKKDLAVCAWKRELVPGL---NCNNEYLAKIIYFGPDGH-------DEGPMGRKIVDLHW-EMEASHRD -SSVRDNYI-VIMLPFKINSRDPWRHYILGIND-QVCLNRPET---ARRLSILA---VMHYSGEFHDKSP-ERSHLHF-RYSEM----KNDVKCVI---K -SNKGLDWTAGKDGMKGEAK-AMMNGALFY-------AEQNWFRKKNHFVIFV-FCDAVRILIMHSGVMIGYTFMEFACNPFFAELFMEHVMVRYQYESN HDIKIKDTCFRLAVYAWENSWETLCNV-EMLSGHFGAKINHVPRLPEQVC-PPCDLTVSGLKGKQ-NGLPYLTDITRTPLIRIQIRVPGSE-SENVVVMY MVYYKERKSIR-----------QNNTGCANLPQ---------------------HRLVVKVAD-ELYNNI---YHAIPKKD--GVLLGIGDSGGMEMVQE DLFQILVCA ; t21 LIVSEVRHMVRDG--ANIAIDEL-C------------LIACRVKAFSGHGNQVRNAVEDAP--RPDFIGV---RELGKP-Y--CMDGHGAVNTGAGVN-S ESAVWIFDIWMYKLSHGMQW--------GDIGRVDVDHPKFLKLEEGAPNCSSLPMPYFLGVDMFHKQVTLKGAKGLVLPDIWNESPMAFPYAQVHSKSH KRCLNVLVQGDHEESEH---HNARI----MS---RIGT-HLVIVNVINVEMQRLNHCEDKAIVYSPDRIGGAGFHSKPI------------------ADG FFDK-D-Q------ARPIIANAHIVPIY----DQWTGPMAQWCSSVIKCGLANHELK-VVSRLLRCVGMECNLGANHYHQVSRMKMHGPIYSHMTEYAHG L----WKPFYQASEHKNEAQGVGER-PHQLPCVESQKCQYEVKVAKVCKLVHV-ALYLVGLKTVVKPKLGDWDGVRCRSDLNKLYLSQLDD----KTLSK -NCFRY--LGNMTWLATSPLQ------NGCVVIFAGRKRQSAE-DYNRAADPTTIAFCRKPIVQH-ADVFGCDKDKDPQEMRM------NNVISESLGDD FLQQ--MPAVPVSVCVHSHMVKKFVPQ-SHEKRGVTWKAKE-DHLVPISLCKGEH---EEEPEGAQY-----------------RCVQNIVKLLYTKDV- DCNVVLSLLVWQLC--H-GDWEPLVPQACQTAKKDLAVCAWKRELVPGL---NCNNEYLAKIIYFGPDGH-------DEGPMGRKIIQLHW-EMEASHRD -SSVRDNII-VIMLPFKINSRDPWRHYILGLND-QVCLTRPET---ARRLSILV---VMHYSGEFHDKSP-ERSHLHY-RYSEV----KNDVKCVILASK -SNKGLDWTAGKDGMKGENK-AMMNGTLFY-------AEQNWFRQKNHFVIFV-ICDAVRILIVHSGVMTGFTFMEFACNPFFAELFMEHVYIRYQYQAN HDIKIKDTCFRLAVYAWENSWETLCNV-EMLSGHFGAKINHVPRLPEQVC-PPCDLTVSGLKGKQ-NGWPYLTDITRTPLIRIQIRVPGSE-SENITVMY MVYYKERKSIR-----------QNNTGCANLPQ---------------------HRLVVKVAD-ELYNNI---YHAIPKKD--GVLLGIGDSGGMEMAQE DLFQILVCV ; t80 LIVSEVRHMVRDG--VNIAVDEI-C------------LIANRVKSMSGQGNQVRNAMEMAA--RQNFVGM---RELEKV-YE-CMDGQGAVNTEAGNN-S ESAVWIFDIWMCKLTHGMQDFGD-----GDIGRVVVDHPKFEKLEEGAPNCSSHPVPYFLGIDMFHKQVMAKGSKGLILPDTWNEASMAFPYPQVHSKSH RRVLNVLHQGDVEES-H---HSARC----VR---RIGF-HLVIIVFWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPV------------------DDG FFDK-D-Q------ARPVIASAHIVPEH----DQWSGAMAQWCSSVIKCGLANHDLE-NVSRLLRCVMIDCNQGANHYIQISRMKVHGPTYSHMTEYAHG L----WKPFYQASDHKNDAQGVGER-PHQLPCVESQTCQYEMKHAKVCKLVHG-ALYLIALKTVVKPKLGVWQGCRCRSQLSKLVLSQLDE----KTLSK -NCSIY--LGNMTWLATSPLQ------IGCVILFGGRKRQSHGLNYNRAADPTTICFYKKPIVVQGADVFGCARGKDPQEMKA------NNVISGSLGDD RLQQ--MPAMPVTICVSSYMVKKSVIQ-SHQKRGYTWRAKE-DHLIPVSFCKGEL---DEEPDAAQQ-----------------RIVQNIVKLLYTKDV- SCNVVLSMLIWQIC--H-GDWEPQIPKACSNARKDLAVCAWKRELVPGL---NHNNENLAKIMYFGHDGH-------DEGPMGRKIVTLHV-EMEVSHRE -SSVKDNMIRVIMLPFSNSSVDPWRHVILGLND-QIKLTRPNT---AHRLSILV---VTHYSGGFHDKSA-ERSQHHF-IYNEV----KDDHKCVILASK -CNKGLDWAAGKD-MKGGAK-VMMNGALFY-------AERNWIRQKHHFVIFV-VCDAVRILIVHYGCMIGYTFMDFACNPFFAELFMDHVFIRYVYMNN HDIKIMDTCFRVAVYTWENSWETCFNR-EMLSGTFGAKISHNPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTDITRTPLIKIHIRVPGSQ-SENHVVMY CVYYKEHKSIR-----------QNNTGCANLPQ---------------------HRLVDQVVD-ELYSKF---YHAIPKKP--GNM-GVGDSGGMEMVQE DVFQILVCL ; t14 LIVSEVRHMVRDG--VNIAVDEI-C------------LIANRVKSMSGQGNQVRNAMEMAA--RQNFVGM---RELEKV-YQ-CMDGQGAVNTEAGNN-S ESAVWIFDIWMCKLTHGMQDFGD-----GDIGRVVCDHPKFEKLEEGAPNCSSHPMPYFLGVDMFHEQVMAKGSKGLILPDTWNEASMAFPYPQVHSKSH RRVLNVLHQGDVEES-H---HSARC----VR---RIGF-HLVIIVYWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPV------------------DDG FFDK-D-Q------ARPVIASAHIVPEH----DQWSGAMAQWCSSVIKCGLANHDLE-NVSRLLRCVMIDCNQGANHYIQISRMKVHGPTYSHMTEYAHG L----WKPFYQASDHKSDAQGVGER-PHQLPCVESQTCQYEMKHAKVCKLVHG-ALYLIALKTVVKPKLGVWQGCRCRSQLSKLVLSQLDE----KTLSK -NCSIY--LGNMTWLATSPLQ------IGCVILFGGRKRQSHGLNYNRAADPTTICFYKKPIVVQGADVFGCARGKDPQEMKA------NNVISGSLGDD RLQQ--MPAMPVTICVSSYMVKKSVPQ-SHQKRGYTWRAKE-DHLIPVSFCKGEL---DEEPDAAQQ-----------------RIVQNIVKLLYTKDV- SCNVVLSMLIWQIC--H-GDWEPQIPKACSNARKDLAVCAWKRELVPGL---NHNNENLAKIMYFGHDGH-------DEGPMGRKIVTLHV-EMEVSHRE -SSVKDNMIRVIMLPFPSSSVDPWRHVILGLND-QIKLTRPNT---AHRLSILV---VTHYSGGFHDKSA-ERSQHHF-IYNEV----KDDHKCVILASK -CNKGLDWAAGKD-MKGGAK-IMMNGALFY-------AERNWIRQKHHFVIFV-VCDAVRILIVHYRCMIGYTFMDFACNPFFAELFMDHVFIRYQYMNN HDIKIMDTCFRVAVYTWENSWETCCNR-EMLSGTFGAKISHNPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTEITRTPLIKIHIRVPGSQ-SENHVVMY CVYYKEHKSIR-----------QNNTGCANLPQ---------------------HRLVDQVVD-ELYSKF---YHAIPKKP--GNM-GVGDSGGMEMVQE DVFQILVCL ; t85 LIVSEVRHVVRDG--VNIAVDEI-C------------LIANRVKSMSGQGNQVRNAMEMAA--RQNFVGM---RELDKA-YQ-CMDGQKAVNTEAGNN-S ESAVWIFDICMCKLTHGMQD--------GDIGRVVCDHPKFEKLEEGAPNCSSIPMPYFLGIDMFHKQVMAKGSKGLILPDTWNEASMAFPYPQVHSKSH RRVLNVLHQGDHEES-H---VSARC----MR---RIGF-HLVIINFWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPI------------------NDG FFDK-D-Q------ARPVIANAHIVPEH----DQWSGAMAQWCSSVIKCGLANHDLE-NVSRLLRCVEIDCNQGANHYVQISRMKMYGPTYSHMTEYAHG L----WKPFYQASDHKNDAQGVGER-PHQLPCVESQTCQYEVKHANVCKLVHA-ALYLIGLKTVVKPELGVWEGCRCRSQLSKLALSQLDE----KTLSK -NCSIY--LGNMTWLATSPLQ------IGCIILFGGRKRQSHGLNYNRAADPTMICFYKKPIVVQGADIFGCARGKDPQEMKS------NNVISGSLGDD RLQH--MPAMPVTICVFSYMVKKAVPQ-SHQKRGYTWRAKE-DHLIPVSFCKGEL---DEEPDGAQQ-----------------RVVQNIVKLLYTKDV- SCNKVLSMLIWQIC--H-GDWEPQIPKACNAARKDLAVCAWKRELVPGL---NHNNEDLAKIMYFGPDGH-------DEGPMGRKIVNLHV-EMEVSHRE -SSVKDNMIRVVMLPFDNNSWDPWRHVILGLND-QIKLTRPNT---AHRLSILV---VMHYSGGFHDKSS-ERSQHHF-IYNEV----KNDHKCVILASK -SNKGLDWAAGKD-MKGGAK-MMANGALFY-------EERNWIRQKNHFVIFV-VCDAVRILIVHYGCMVPYTFMAFACNPFFAELFMEHVFIRYQYVNN HDIKIMDTCFRVAVYTWENSWETCCNR-EMLSGTFGAKISHNPRLPEQVI-PPCDLTVSGLKGKH-NGWPYLTDITRTPLIKIHIRIPGSQ-SENHVVMY CVYYKEHKSIR-----------QNNTGCANLPQ---------------------HRLVDKVVD-ELYSKF---YHAIPKKP--GNM-GIGDSGGMEMVQE DLFQILVCI ; t62 LIVSEVRVMVRDG--IHIAVDEI-C------------LIANRVKCMSGQGNQARNAMEMAA--RQNFVGM---RELGKQ-YQ-CMDGQGAVNTEAGNN-S ESAVWIFDIWMCRLTHGMQD--------GDIGRVECDHPKFAKLEEGAPNCSSLPMPYFLGIDMFHNQVMAKGSKGLILPDTWNEASMAFPYPQVHSKSH RRVLNVLHQGDHEES-H---HSARC----MR---RIGY-HRVIINYWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPI------------------ADG FFDK-D-Q------ARPVIANAHIVPEY----DQWSGAMAQWCSSVIKCGLANHDLE-NVSRLLRCVVIDCNHGANHYVQISRMKMHGPTYSHMTEYAHG L----WKPFYQASDHKNDAQGVGER-PHQLPCVEPQTCQYEVKHAKVCKLVHG-ALYLIGLQTVVKPKLGVWEGVQCRSQLSKLILSQLDD----KTLSK -NCSIY--LGNMTWLATSPLQ------IGCIIMFDGRKRQSHGLQYNRAADPTTICFYKKPIVVQGADIFGCARGKDPQEMKA------NNVTSGSLGDD RLQQ--MPAMPVTICVFSYMVKKSVPQ-SHVKRGYTWRAKE-DHLIPVSFCKGEL---EEEPEGAQH-----------------RMVQNIVKLLYTKDV- SCNVVLSMLIWQIC--H-GDWEPQIPKACDAARKDLAVCAWKRELVPGL---NHNDENLAKIMYFGPDGH-------DEGPMGEKIVNLHV-EMEASHRE -SSVKDNMIRVIMLPFSNNSWDPWRHVILGLND-KIKLTRPNT---AHRLSILV---VMHYSGGFHDKSA-ERSQHHF-IYNEI----KNDHKCVILASK -ANKPLDWAAGKD-MKGGAK-MMANGALFY-------AERNWIRQKNHFVIFI-ICDAVRILIVHYGCMVGDTFMDFACNPFFAELFMEHVFIRYQYIPN HDIKIMDTCFRVAVYTWENSWETCCNR-EMLSGTFGARISHVPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTDITRTPLIRIHIRVPGSE-SENHVVMY CVYYKEHKSIR-----------QNNTGCANLPQ---------------------HRLVDRVVD-ELYSKF---YHAIPKKP--GNM-DVGDSGGMEMVME DVFQILVCV ; t19 LIVSEVRHMVRDG--INIAVDEI-C------------LIANRVKCVSGQGNQARNAMEMAA--RQNFVGM---RELGKQ-YQ-CMDGQGAVNTEAGNN-S ESAVWIFDIWMCRLTHGMQD--------GDIGRVECDHPKFAKLEEGAPNCSSLPMPYFLGIDMFHKQVMGKGSKGLILPDTWNEASMAFPYPQVHSKSH RRVLNVLHQGDHEES-H---HSARC----MR---RIGYGHLVIINFWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPI------------------ADG FFDK-D-Q------ARPVIANAHIVPEY----DQWSGAMAQWCSSVIKCGLANHDLE-NVSRLLRCVVIDCNHGANHYVQISRMKMHGPTYSHMTEYAHG L----WKPFYQASDHKNDAQGVGER-THQLPCVEPQTCQYEVKHAKVCKLVHG-ALYLIGLQNVVKPKLGVWEGVQCRSQLSKLILSQLDD----KTLSK -NCSIY--LGNMTWLATSPLQ------IGCIILFDGRKRQSHGLQYNRAADPTTICFYKKPIVVQGADIFGCARGKDPQEMKA------NNVTSGSLGDD RLQQ--MPAMPVTICVFSYMVKKLVPQ-SHQKRGYTWRAKE-DHLIPVSFCKGEL---EEEPEGAKQ-----------------RMVQNIVKLLYTKDV- SCNVVLSMLIWQIC--H-GDWEPQIPKACDAARKDLAVCAWKRELVPGL---NHNDENLAKIMYFGPDGH-------DEGPMGEKIVNLHV-EMEASHRE -SSVKDNMIRVIMLPFSNNSWDPWRHVILGLND-QIKLTRPNT---AHRLSILV---VMHYSGGFHDKSA-ERSQHHF-IYNEI----KNDHKCVILASK -ANKGLDWAAGKD-MKGGAK-MMVNGALFY-------AERNWIRQKNHFVIFV-ICDAVRILIVHYGCMVGDTFMDFACNPFFAELFMEHVFIRYQYIPN HDIKIMDTCFRVAVYTWENSWETCCNR-EMLSGTFGARISHVPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTDITRTPLIRIHIRVPGSE-SENHVVMY CVYYKEHKSIR-----------QNNTGCANLPQ---------------------HRLVDKVVD-ELYSKF---YHAIPKKP--GNM-DVGDSGGMEMVME DVFQILVCV ; t77 LIVSEVRHMVRDG--INIAVDEI-C------------LIANRVKSMSGQGNQARNAMEMAA--RQNFVGM---RELGKQ-YH-CMDGQGTVNTEAGNN-S ESAVWIFDIWLCRLTHGMQD--------GDIGRVECDHPKFAKLEEGAPNCSSLPMPYFLGIDMFHRQVMAKGSKGLILPDTWNEASMAFPYPQVHSKSH RRVLNVLHQGDHEES-H---HSARC----MR---RIGY-VLVIINFWNVEMGRLIHCEDEAIIYSPLRIGGAGFHSKPI------------------ADG FFDK-D-Q------ARPVIANAHIVPEY----DQWSGAMPQWVSSVIKCGLANHDLE-NVSRLLRCVVIDVNQGANHYVQISRMKMHGPTYSHMTEYAHG L----WKPFYQASDHKNDAQGVGER-VHQLPCVEPQTCQYEVKHAKVCKHVHG-ALYLIGLQTVVKPKLGVWEGCQCRSQLSKLILSQLDD----KTLSK -NCNIY--LGNMTWLATFPLQ------IGCIILFDGRKRQSYGLQYNRAADPTTICFYKKPIVVQGADIFGCARGKDPQEMKA------NNVTSGSLGDD RLQQ--MPAMPVTICVFSYMVKKSVPQ-SHQKRGYTWRAKE-DHLIPVSFCKGEL---EEEPEGAQQ-----------------RIVQNIVKLLYTKDI- SCNVVLSMLIWQIC--H-GDWEPQLPKACDAARKDLAVCAWKRELVPGL---NHNDENLAKIMYFGPDGH-------DEGPMGEKIVNLHV-EMEASHRE -SSVKDNMIRVIMLPFGENSWDPWRHVILGLND-QIKLTRPNT---AHRLSILV---VMHYSGGFHDNSA-ERSQHHF-IYNEV----KNDHKCVILASK -ANKGLDWAAGKD-MKGGAK-MMVNGALFY-------AERNWIRQKNHFVIFV-ICDAVRILIVHYGCMVGDTFMDFACNPFFAELFMEHVVIRYQYIPN HDIKIMDTCFRVAVYTWENSWETCCNR-EMLSGTFGARISHVPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTDITRTPLIRIHIRVPGSE-SENHVVMY CVYYKEHKSIR-----------QNNTGCANLPQ---------------------HRLVDKVVD-ELYSKF---YHAIPKKP--GNM-DVGDSGGMEMVME DVFQILVCA ; t88 LIVSECRLIIRDG--NHDAIDEM-CCVANDLNNEIERLVASMVKSFRGHDSQARNNSECMR--SAPFIGV---RELFKR-YHKCVEGAGCVHTVAGTP-S DSPVWMFDQFMCQLTHSMVD--------GDLGRVVQDNVIFAKLKEGAPHCISL--PHFLGIDMFHTQVFVGGH--LILPDPCYELSISVMYAGHASYNQ KRCINNLDQGDQEDSNHRKEHKIRASVLLYR---QIGI-L-VIIKEANELMNRLNHKEPENGIIFPLR---DAQDPKQI------------------LNG LFDK-E-E------NRPMVQDADSVVGS----AQWAGQHRSWCSSDDKA-----------SQLPRNTHIVVEIGANVYEQFSRMKTNIPIYAHVTEYAVG V----ERPFYE-SEFKNEAQGWGES-GTSIPCVDSPDVQYEMKVAWVDKLMHT-ALYLMPLATVHKPEMGTVRGERCRAIL-KLLMMQLD------TLSR -NQLPK--LCQGTWLDASPLQ------IGVQVMLVGKKGGSKK-EYELAADQVIIYFYQAPIIYVKADVFSGTVAKKAQAMR-------KSTGSQSIGDD GMQS--MPLMQNAVCVWSKMVRKVQPD-GQDKREQTWMAKD-DTLCPPSMKRGEK---TAEPTQWMG-----------------TVTVNKIKLLYCKDC- SCNEVMKILSWWLCNSV-GDWQTLMSQACITADPNPPVCVWKRELVPGL---NRSVENLAKIIYFCPDEH-------DERKMWGKIFALEW-EMDISHRH -SSVDDNHC-VEMLPFMCQRVDPWGHYVQILAD-RQDLARPVT---LQALSILP---CPHASGKEQDGAV-ERSQHYV-VYAEL----QVDHKCTIAAHK YQDKMKDWDTGKN-MDEGAK-MYVEIHLFY-------AENNIRRQNDHSVIFW-ISDAKKRLICGMSSMCLVTFMTLACNPLFAKLFMEVVPMRYDYLTN HDIKI------MEVYAPENSWETIKNV-EMVSGKEGAEINHTPQLPEQVI-PPCNLTVSNLKGMI-PPCPALTEITRTV--FIV--MYYAI-LRNKIMQY CFYYKENRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGG---YHAAPKKE--EMV-GPGDNGGVEDIKE DMFQVLVCR ; t37 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-S ESPVWMFDKFMCQLTHSMVN--------GDLGRVVHDNWTFTKLKEGAPHCIAL--PYFMGIDMFHIQVFVGGN--LILPDPCYELSISVMYAGHASYNQ KRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIGL-L-VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNG LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVTEYAVG V----ARPFYD-AEFKNEAQGQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKGERCRTIL-KLLMVQLDE----KTLSQ -NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRGSKK-DYELAADPVIIYFYQAPIIHHKADVFAGTVAKKSQAMRS------RSIGSQSVGDD GMQN--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWMAKD-DTLCPPCEEQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC- SCNEVLKILAWWLCDSV-GDWQTLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALEW-EMDISHRH -SSVDDNQC-VELLPYVCQRMDPWGHYVQILAD-RLDLTRPVT---LQGLSILP---CPHASGKEQDGAV-ERSQHYG-VYQEL----QNDHKSTIDANK NDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNLRKQNEHSVTFV-ISDAQKRLICGVSSMVMRTFMTLACNPFFAKLFMEVVPMSYDYVPN HDIKI------IEVYAPENSWETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPVPALTEITRTP--FIV--MYVAI-LRNDIMQY CFYYKENRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-SPGDNGGVEAIRE DMFQVLVCK ; t35 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-S ESPVWMFDKFMCQLTHSMVN--------GDLGRVVHDNWTFTKLKEGAPHCIAL--PYFMGIDMFHIQVFVGGN--LILPDPCYELSISVMYAGHASYNQ KRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIGL-L-VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNG LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEIGANVYEQYSRMKTVIPIYARVTEYAVG V----PRPFYD-AEFKNEAQGQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPQMGTVKGERCRTIL-KLLMVQLDE----KTLSQ -NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRGSKK-DYELAADPVIIYFYQAPIIYHKADVFAGTVAKKSQAMRS------RSIGSQSVGDD GMQN--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWMAKD-DTLCPPCEEQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC- SCNEVLKILAWWLCDSV-GDWQTLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALEW-EMDISHRH -SSVDDNQC-VELLPYVCQRMDPWGHYVQILAD-RLDLTRPVT---LQGLSILP---CPHASGKEQDGAV-ERSQHYG-VYQEL----QNDHKSTIDANK NDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNLRKQNEHSVTFV-ISDAQKRLICGVSSMAMRTFMTLACNPFFAKLFMEVVPMSYDYVPN HDIKI------IEVYAPENSWETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPCPALTEITRTP--FIV--MYVAI-LRNDIMQY CFYYKENRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-SPGDNGGVEAIRE DMFQVLVCK ; t48 LIVSDMRLIIREG--SDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-S ESPVWMFDKFMCQLTHSMVN--------GDLGRVVHDNWTFTKLKEGAPHCIAL--PYFMGIDMFHIQVFVGGN--LILPDPCYELSISVMYAGHASYNQ KRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIGL-L-VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNG LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEVGANVYEQYSRMKTVIPIYARVTEYAVG V----PRPFYD-AEFKNEAQGQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKGERCRTIL-KLLMVQLDE----KTLSQ -NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRGSKK-DYELAADPVIIYFYQAPIIYHKADVFAGTVAKKSQAMRS------RSIGSQSVGDD GMQN--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWMAKD-DTLCPPCEEQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC- SCNEVLKILAWWLCDSV-GDWQTLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALEW-EMDISHRH -SSVDDNQC-VELLPYVCQRMDPWGHYVQILAD-RLDLTRPVT---LQGLSILP---CPHASGKEQDGAV-ERSQHYG-VYQEL----QNDHKSTIDANK NDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNLIKQNEHSVTFV-ISDAQKRLICGVSSMVMRTFMTLACNPFFAKLFMEVVPMSYDYVPN HDIKI------IEVYAPENSWETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPCPALTEITRTP--FIV--MYVAI-LRNDIMQY CFYYKENRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-SPGDNGGVEAIRE DMFQVLVCK ; t55 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-S ESPVWMFDKFMCQLTHSMVN--------GDLGRVVHDNWTFTKLKEGAPHCIAI--PYFMGIDMFHIQVFVGGN--LILPDPCYELSISVMYAGHASYNQ KRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIGL-L-VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNG LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVTEYAVG V----ARPFYD-AEFKNEAQGQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLSTVHKPEMGTVKGERCRTIL-KLLMVQLDE----KTLSQ -NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRGSKK-DYELAADPVIIYFYQAPIIHHKADVFAGTVAKKSQAMRS------RSIGSQSVGDD GMQN--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWMAKD-DTLCPPCEEQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC- SCNEVLKILAWWLCDSV-GDWQTLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALEW-EMDISHRH -SSVDDNQC-VELLPYNCQRMDPWGHYVQILAD-RLDLTRPVT---LQGLSILP---CPHASGKEQDGAV-ERSQHYG-VYQEL----QNDHKSTIDANK NDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNLRKQNEHSVTFV-ISDAQKRLICGVSSMCMRTFMTLACNPFFAKLFMEVVPMSYDYVPN HDIKI------IEVYAPENSWETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPCPALTEITRTP--FIV--MYVAI-LRNDIMQY CFYYKENRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-SPGDNGGVEAIRE DMFQVLVCK ; t46 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-S ESPVWMFDKFMCQLTHSMVN--------GDLGRVVHDNWTFTKLKEGAPHCIAL--PYFMGIDMFHIQVFVGGN--LILPDPCYELSISVMYAGHASYNQ KRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIGL-L-VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNG LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVTEYAVG V----ARPFYD-AEFKNEAQGQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKGERCRTIL-KLLMVQLDE----KTLSQ -NQIPD--LCQRTWLDASPLQ------IGVKVVLVGKKRGSKK-DYELAADPVIIYFYQAPIIHHKADVFAGTVAKKSQAMRS------RSIGSQSVGDD GMQN--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWMAKD-DALCPPCKEQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC- SCNEVLKILAWWLCDSV-GDWQSLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALEW-EMDISHRH -SSVDDNQC-VELLPYVCQRMDPWGHYVQVLAD-RLDLTRPVT---LQGLSILP---CPHASGKEQDGAV-ERSQHYG-VYQEL----QNDHKSTIDANK NDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNIRKQNEHSVTFV-ISDAQKRLICGVSSMCMRTFMTLACNPFFAKLFMEVVPMSYDYVPN HDIKI------IEVYAPDNSWETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPCPALTEITRTP--FIM--MYVAM-LRNDIMQY CFYYKENRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-APGDNGGVEAIRE DMFQVLVCK ; t67 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-S ESPVWMFDKFMCQLTHSMVN--------GDLGRVVMDNWTFTKLKEGAPHCIAL--PYFMGIDMFHIQVFLGGD--LILPDPCYELSISVMYAGHASYNQ KRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIGL-L-VIIKEANELMGRLNHKERKNGVIFPLR---GAQGPKQI------------------SNG LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVTEYAVG V----ARPFYD-AEFKNEAQRQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKGERCRTIL-KLLMVQLDE----KTLSQ -NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRGSKK-DYELAADPVIIYFYQAPIIHHKADVFAGTVAKKAQAMRS------RSIGSPSVGDD GMQN--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWVAKD-DTLCPPCKEQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC- SCNEVLKILAWWLCDSV-GDWQTLMNDACSSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALEW-EMDISHRM -SSVDDNQC-VELLPYVCQRMDPWGHYVQILAD-RLVLTRPVT---LHGLSILP---CPHASGKEQDGAE-ERSQHYG-VYQEL----QNDAKSTIDANK NPNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNIRKLNEHSVTFV-ISDAQKRLICGVSSMCMRTFMTLACNPFFAKLFMEVVPMSYDYVPN HDIKI------IEVYAPENSWETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPVPALTEITRTP--FIV--MYVAI-LRNDIMQY CFYYKENRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-APGDNGGVEAIRE DMFQVLVCK ; t57 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR-YPKCMEGARCVHTVAGPP-S ESPVWMFDKFMCQLTHSMVN--------GDLGRVVMDNWTFTKLKEGAPHCIAL--PYFMGIDMFHIQVFIGGD--LILPDPCYELSISVMYAGHASYNQ KRCINGLDNGDRQDSDPQKEHKIRNSVLLYR---QIGL-L-VIIKEANEIMGRLNHKERKNGVIFPLR---GAQGPKQI------------------SNG LFEK-E-E------NRPMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVTEYAVG V----ARPFYD-AEFKNEAQGQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKGERCRTIL-KLLMVQLDE----KTLSQ -NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRGSKK-DYELAADPVIIYFYQAPIIHHKADVFAGTVAKKAQAMRS------RSIGSPSVGDD GMQK--MPLMQNAVCVWSQMVRKVQPD-GQDKREQTWVAKD-DTLCPPCKDQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC- SCNEVLKILAWWLCDSV-GDWQTLMNDACSSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALEW-EMDISHRV -SSVDDNQC-VELLPYLCQRMDPWGHYVQILAD-RLNLTRPVT---LQGLSILP---CPHASGKEQDGAV-ERSQHYG-VYQEL----QNDAKSTIDANK NPNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNIRKQNEHSVTFV-ISDAQKRLICGVSSMCMRTFMTLACNPFFAKLFMEVVPMSYDYVPN HDIKI------IEVYAPENSWETTKVV-EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPVPALTEITRTP--FIV--MYVAI-LRNDIMQY CFYYKENRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--ELM-APGDNGGVEAIRE DMFQVLVCK ; t56 LIVSEWRLFIRDG--HDDAIDEM-CCEANELNNDIEKLVASMVKGFRGHDSQARNNAECII--AAPFIGV---RELFKR-YIKCVEGAGCVYTVA-AP-S ESPVWMFDKFMCHLTHSMVV--------GDLGRVLRDNTTFAKLKEGAPVCISL--PYFLGMDMFHQQVFMGGN--LILPDPCYELSISVMYAGHASYNQ KRCLNNLDQGDREDSSHRKEHKIRRSVLLYQ---QIGC-L-VITRERNELMGRLNHKEPRDGVIFPHR---GAQGPKQL------------------ANG LFEK-E-E------NRPMVQDAGSVPER----AQWPGQQRAWCSSDDKA-----------SKLPRNTKMVAEIGANVYEQASRMKTNIPIYAHVTEYALG V----DRPFYD-SEFKNEAQGQGES-GTSIPCVDSPDVQYELKHAWVDKLMHT-ALYLMPLPTVHKPKMGTVKGERCRAML-KLNMMQLDE----KTLSQ -NQIIK--LCQRTWLDASPLQ------IGVNCVLPGKKGGSNK-DYELAADPVIIYFYQAPIIHHKADVFSGTVAKKAQAMRQ------QSTGSQSVGDD GTQV--MPLMQNLVCVWSKMVRKCMID-GQEKREQTWMAKD-DKLCPPSQEQGEK---AAEPTQWED-----------------ICTANVIKLLYCKDC- SCNEVLRVLSWWLCDSV-GDWRTLMSDACALANPNPPVCVWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERKMWGKIFCLEW-EMDISHRH -SSVDDNVC-VEMLPFVCQKMDPWGHYVQILAD-RLDLTRPVT---LQGLGILV---CPHASGKEQDGAM-ERSQHYV-VYAEL----QNDHKNTINANK VPRKMLDWDTGKN-MDKGAKGMYHEIVLFF-------AENNVKKQVEHSVIFI-IADAQKRLICGVSSMCLQTFMNLACNPFFAKLFMEVIPMRYDYQTN HDIKI------KEVYAPENSWETIKNV-EMVSGKAGKQINHEPQLPEQVI-PPCSLTVSNLKGMM-PPCPAMTEITRTV--III--MYYAI-LRNEIMQY CFYYKENRSVR-----------INNTGQAELPK---------------------HHVQNVFID-ELYPGH---YHAAPKKN--ENV-GPGDNGGVEGIKE DMFQVLVCQ ; t13 GIVSECRMIIRDE--HDDAIDEM-C------------LVASMVKKLSGCENQARNNHECAI--PPPFHGV---REMFKRVYE-CMEGIGCVNTVAGNP-S ESSAWMFDKVMYQLTHSMVW--------GDLRRVVHDNVTFSKLKEGAPHCISH--PYFLGIDMFHIQVYSKGF--LTLPDPRYEISMSVMYSQHHSFSM KRCLNGLDHGDREESPHQIEHKMRKSVLIYN---PIGY-L-VIIKDANSMMGRLNHHESRAVVAFPLR---GAEGPKQV------------------MEG LFDK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVG I----VKPFYD-NEWKQEAQGQGEA-GACIPCVDSKDVQYELKHAYVKKLMHT-SLYLMHIETCHKPVMGTVKGNRCRAIL-KLSMIQLDQ----KTLSQ -NQRAK--LCQRTWLDTSPLQ------MGMTLVLVGKKVGSKK-DYEVAADPVIMYFYDAPMIMRPTDVFEGTNNKKAQAMRS------RSTASQSIGDD DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKD-DTLCPQSRDQGEK---HHEPHHIRN-----------------KFSENVIKLLYCKDC- SCNKVLKILVWQLCESV-DDWQTLSSDACMHAEQNPPVCVWQRELVPQL---NRNIEDLARIIYFGPDEH-------DEGKMMNKIFALEW-EMDMSHRV -SSVDDNYI-IEMLPFVCDRVGPWGHYAQVLAD-QLHLTRPHT---LRDLYILAC--CPVASGKEQDGNK-ERSQHYT-IYAEI----QNDHKNPISANN QPNKMLDWDTGKT-MDRGAA-MCGEIKLFY-------AEPNVWNQNDHSVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFSKLFMEVVCMRYNYTSN HSIKI------FEVYADENSWENVRNF-EMVSGRAGAHINHIPQLPEQAY-PPCKLTVSNLKGVA-ASCPAITEITRTA--VIV--MYGSI-LRNDVMQY VFYYRENRSVR-----------QNNTGNAELPK---------------------HHMQDVFID-ELYLGP---YHASPKKD--RFC-NPGDNGGMETYQE DMFQVLVCV ; t5 GIVSECRMIIRDE--HDDAIDEM-C------------LVASMVKKLSGCENQARNNHECAI--PPPFHGV---REMFKRVYE-CMEGIGCVNTVAGNP-S ESSAWMFDKVMYQLTHSMVW--------GDLRRVVHDNVTFSKLKEGAPHCISH--PYFLGIDMFHIQVYSKGF--LTLPDPRYEISMSVMYSQHHSFSM KRCLNGLDHGDREESPHQIEHKMRKSVLIYS---PIGY-L-VIIKDANSMMGRLNHHESRAIVAFPLR---GAEGPKQV------------------MEG LFDK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVG I----VKPFYD-NEWKQEAQGQGEA-GACIPCVDSKDVQYELKHAYVKKLMHT-SLYLMHIETCHKPVMGTVKGNRCRAIL-KLSMIQLDQ----KTLSQ -NQRAK--LCQRTWLDTSPLQ------MGMTLVLVGKKVGSKK-DYEVAADPVIMYFYDAPMIMRPTDVFEGTNNKKAQAMRS------RSTASQSIGDD DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKD-DTLCPQSRDQGEK---HHEPHHIRN-----------------KFSENVIKLLYCKDC- SCNKVLKILVWQLCESV-DDWQTLSSDACMHAEQNPPVCVWQRELVPQL---NRNIEDLARIIYFGPDEH-------DEGKMMNKIFALEW-EMDMSHRV -SSVDDNYI-IEMLPFVCDRVGPWGHYAQVLAD-QLHLTRPHT---LRDLYILAC--CPVASGKEQDGNK-ERSQHYT-IYAEI----QNDHKNPISANN QPNKMLDWDTGKT-MDRGAA-MCGEIKLFY-------AEPNVWNQNDHSVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFSKLFMEVVCMRYNYTSN HSIKI------FEVYADENSWENVRNF-EMVSGRAGAHINHIPQLPEQAY-PPCVLTVSNLKGVA-ASCPAITEITRTA--VIV--MYGSI-LRNDVMQY VFYYRENRSVR-----------QNNTGNAELPK---------------------HHMQDVFID-ELYLGP---YHASPKKD--RFC-NPGDNGGMDTYQE DMFQVLVCV ; t38 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECAI--SHPFHGV---REMFKR-YE-CMEGIGCVNTVAGNP-S ESSAWMFDKFMYQLTHSMVW--------GDLGRVYYDNITFSKLKEGAPHCISQ--PFFLGIDMFHIQVYFKGS--LTLPDPRYELSMSVMYSQHHSFSQ KRCLNGLDVGDREESPHHIDHKMRKSVLIYN---PIGY-L-VIIKNANSLMGRLNHHESRLIVTFPLR---GAEGPKQV------------------MEG LFLK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVG I----VKPFYD-SEWKQEAQGQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTIKGNRCRAIL-KLTMIQLDQ----KTLSQ -NQRAR--LCQRTWLDTSPLQ------MGMTIVLVGKKVGSKK-DYEIAADPVIMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDD DMLN--MPLTQNAMCVESEMVRKSQPD-GQDKRGYTWVAKE-DDLCPQSGDQGEK---YAEPQHIRN-----------------QFSENVIKLLYCKDC- SCNKVLKILVWQLCESV-DDWQTLASDACVSAEPNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEGKMMCKIFALEW-EMDMSHRM -SSVDDNYI-IEMLPFVCDRVGPWGHYAQILAD-QLNLTRPHT---LRELYILA---CPVASGKEQDGAQ-ERSQCYT-IYAEL----QNDHKSLISANH VPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDHSVMFI-ISDAQNKLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSN HSIKI------FEVYADENSWENIRNF-EMVSGRAGAYINHIPQLPEQAV-PPCHLTVSNLKGVA-ASCPAITEITRTA--VIV--MYVSI-LRNDVMQY VFYYRENRIVR-----------QNNTGVAELPK---------------------HHMQDVFID-ELYLGP---YHASPKKE--RFC-NVGDNGGMETIQE DMFQVLVCI ; t33 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECAI--SHPFHGV---REMFKR-YE-CMEGIGCVNTVAGNP-S ESSAWMFDKFMYQLTHSMVW--------GDLGRVYYDNITFSKLKEGAPHCISH--PFFLGIDMFHIQVYFKGS--LTLPDPRYELSMSVMYSQHHSFSQ KRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIGY-L-VIIKNANSLMGRLNHYESRAIVTFPLR---GAEGPKQV------------------MEG LFLK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SHLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVG I----VKPFYD-SEWKQEAQGQGEA-GACIPCVDSPDVQYELKHAYVKKLVHN-SLYLMHIDTCHKPAMGTVKGNRCRAIL-KLTMIQLDQ----KTLSQ -NQRAR--LCQRTWLDTSPLQ------MGMTIVLVGKKVGSKK-DYEIAADPVIMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDD DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKE-DTLCPQSGDQGEK---YAEPQHIRN-----------------KFSENVIKLLYCKDC- SCNKVLKILVWQLCESV-DDWQTLASDACVSAEPNTPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEGKMMCKIFALEW-EMDMSHRM -SSVDDNYI-IEMLPFVCDRVGPWGHYAQILAD-QLNLTRPHT---LRELYILA---CPVASGKEQDGAK-ERSQCYT-IYAEL----QNDHKSLISANH VPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDHSVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSN HSIKI------FEVYADENSWENIRNF-EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAITEITRTA--VIV--MYVSI-LRNDVMQY VFYYRENRIVR-----------QNNTGVAELPK---------------------HHMQDVFID-ELYLGP---YHASPKKE--RFC-NPGDNGGMETIQE DMFQVLVCV ; t100 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECAI--SHPFHGV---REMFKR-YE-CMEGIGCVNTVAGNP-S ESSAWMFDKFMYQLTHSMVW--------GDLGRVYYDNITFSKLKEGAPHCISH--PFFLGIDMFHIQVYFKGS--LTLPDPRYELSMSVMYSQHHSFSQ KRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIGY-L-VIIKNANSLMGRLNHHESRAIVTFPLR---GAEGPKQV------------------MEG LFLK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVG I----VKPFYD-SEWKQEAQGQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTVKGNRCRAIL-KLTMIQLDQ----KTLSQ -NQRAR--LCQRTWLDTSPLQ------MGMTIVLVGKKVGSKK-DYEIAADPVIMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDD DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKE-DTLCPQSEDQGEK---YAEPQHIRN-----------------KFSENVIKLLYCKDC- SCNKVLKILVWQLCESV-DDWQTLASDACVSAEPNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEGKMMCKIFALEW-EMDMSHRM -SSVDDNYI-IEMLPFVCDRVGPWGHYAQILAD-QLNLTRPHT---LRELYILA---CPVASGKEQDGAQ-ERSQCYT-IYAEL----QNDHKSLISANH VPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDHSVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSN HSIKI------FEVYADENSWENIRNF-EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAITEITRTA--VIV--MYVSI-LRNDVMQY VFYYRENRIVR-----------QNNTGVAELPK---------------------HHMQDVFID-ELYLGP---YHASPKKE--RFC-NPGDNGGMETIQE DMFQVLVCV ; t34 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECAI--SHPFHGV---REMFKR-YE-CMEGIGCVNTVAGNP-S ESSAWMFDKFMYQLTHSMVW--------GDLGRVYYDNITFSKLKEGAPHCISH--PFFLGIDMFHIQVYFKGS--LTLPDPRYELSMSVMYSQHHSFSQ KRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIGY-L-VIIKNANSLMGRLNHHESRAIVTFPLR---GAEGPKQV------------------MEG LFLK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVG I----VKPFYD-SEWKQEAQGQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTVKGNRCRAIL-KLTMIQLDV----KTLSQ -NQRAR--LCQRTWLDTSPLQ------MGMTIVLMGKKVGSKK-DYEIAADPVIMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDD DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKE-DTLCPQSGDQGEK---YAEPQHIRN-----------------KFSENVIKLLYCKDC- SCNKVLKILVWQLCESV-DDWQTLASDACVSAEPNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEGKMMCKIFALEW-EMDMSHRM -SSVDDNYI-IEMLPFVCDRIGPWGHYAQILAD-QLNLTRPHT---LRELYILA---CPVASGKEQDGAQ-ERSQCYT-IYAEL----QNDHKSLISANH VPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDHSVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSN HSIKI------LEVYADENSWENIRNF-EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAMTEITRTA--VIV--MYVSI-LRNDVMRY VFYYRENRIAR-----------QNNTGVAELPK---------------------HHMQDIFID-ELYLGP---YHASPKKQ--RFC-NPGENGGMETIRE DMFQVLVCV ; t84 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCDDQARNNHECAI--SHPFHGV---REMFKR-YE-CMEGIGCVNTVAGNP-S ESSAWMFDKFMYQLTHSMVW--------GDLGRVYYDNITFSKLKEGAPHCISH--PFFLGIDMFHIQVYFKGS--LTLPDPRYELSMSVMYSQHHSFSQ KRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIGY-L-VIIKNANSLMGRLNHHESRAIVTFPLR---GAEGPKQV------------------MEG LFLK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVG I----VKPFYD-SEWKQEAQGQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTVKGNRCRAIL-KLTMIQLDV----KTLSQ -NQRAR--LCQRTWLDTSPLQ------MGMTIVLMGKKVGSKK-DYEIAADPVIMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDD DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKE-DTLCPQSGDQGEK---YAEPQHIRN-----------------KFSENVIKLLYCKDC- SCNKVLKILVWQLCESV-DDWQTLASDACVSAEHNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEGKMMCKIFALEW-EMDMSHRM -SSVDDNYI-IEMLPFVCDRIGPWGHYAQILAD-QLNLTRPHT---LRELYILA---CPVASGKEQDGAK-ERSQCYT-IYAEL----QNDHKSLIAANH VPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDHSVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSN HSIKI------LEVYADENSWENIRNF-EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAMTEITRTA--VIV--MYVSI-LRNDVMRY VFYYRENRIAR-----------QNNTGVAELPK---------------------HHMQDIFID-ELYLGP---YHASPKKE--RFC-NPGENGGMETIRE DMFQVLVCV ; t92 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECAI--SHPFHGV---REMFKR-YE-CMEGIGCVNTVAGNP-S ESSAWMFDKFMYQLTHSMVW--------GDLGRVYYDNITFSKLKEGAPHCISH--PFFLGIDMFHIQVYFKGS--LTLPDPRYELSMSVMYSQHHSFSQ KRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIGY-L-VIIKNANSLMGRLNHHESRAIVTFPLR---GAEGPKQV------------------MEG LFLK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVTEYAVG I----VKPFYD-SEWKQEAQGQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTVKGNRCRAIL-KLTMIQLDV----KTLSQ -NQRAR--LCQRTWLDTSPLQ------MGMTIVLMGKKVGSKK-DYEIAADPVIMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDD DMLN--MPLTQNAMCVESEMVRKCQPD-GQDKRGYTWVAKE-DTLCPQSGDQGEK---YAEPQHIRN-----------------KFSENVIKLLYCKDC- SCNKVLKILVWQLCESV-DDWQTLASDACVSAEPNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEGKMMCKIFALEW-EMDMSHRM -SSVDDNYI-IEMLPFVCDRIGPWGHYAQILAD-QLNLTRPHT---LRELYILA---CPVASGKEQDGAQ-ERSQCYT-IYAEL----QNDHKSLISANH VPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDHSVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSN HSIKI------LEVYADENSWENIRNF-EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAMTEITRTA--VIV--MYVSI-LRNDVMRY VFYYRENRIAR-----------QNNTGVAELPK---------------------HHMQDIFID-ELYLGP---YHASPKKE--RFC-NPGENGGMETIRE DMFQVLVCV ; t3 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEQQARNNRECAT--SIPFLGV---RELWKR-YE-CMEGIGCVNTVAGKP-S QSSVWMFDRFMYKLTHSMVW--------GDLGRVYWDNLTFQKLKEGAPHCIAV--PNYLNIDMFHIQVFYKGP--LTLPDPHYELSMSVMYAQHHSFSQ KRCLNALDHGDREESPHQIEHKMRKSVLLYN---PIGY-L-VIIKNANSMMGRLNHHESCAIVMFPLR---GAEGPKQI------------------IEG LFDK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SRLPRNDPVVHEVGANVYQQISRMKTSIPIYAHVTEYAVG I----IKPFYD-NEWKQEAQGQGEA-GASIPCVNSPDVQYELKHAHIKKLMHT-SLYLMHLGTCHKPVMGTVKGDRCRAIL-KLSMIQLDQ----KTLSQ -NVRAR--LCQRTWLDTSPLQ------MGMVAVLLGKKLGSKK-DYEIAADPVIKYFYQAPIIMRRTDVFEGTDDQKAQAMRV------RSTASQSIGDD DMLI--MPLVQNAMCVKSEMVRKCQPD-GPDKRGYTWMAKD-DTLCPVSAGQGEK---YAEPHQIKN-----------------KFSVNVIKLLYCKDC- SCNKVLKILVWQLCESV-DDWVTLASDACLSAEPQPPVCVWQRELVPQL---NRNVEDLARVIYFGPDEH-------DEGKMMPKIFKLAW-EMDMSHRA -SSVDDNYV-IEMLPFVCHRVGPWGHYAQVQAD-QQNLTRPHT---LADLFILS---CPVASGKEQDGAT-ERSQYYV-VYAEL----QNDHKSPISANK HPDKMLDWDTGKS-MDRGAA-MCREIKLFY-------AEPNVWKQNDHSVVFI-ISDAVNRLICANSIMCGLTFMAMACNPHFAKLFMEVVCMRYDYFSN HSIKI------FEVYAEDDSWENICNF-EMMSGRTGAQINHIPQLPEQVC-PPCHLTVSNLKGCC-ASVPAITEITRTV--VIC--MYVSI-LCNVVMQY VFYYKENRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGQ---YHASPKKK--KIC-GPGDNGGMETIEE DMFQVLVCV ; t22 GIVSECRVIIRDQ--SDDAIDER-Y------------LVASVVKRLSGCENQARNNRECAI--SIPFLGV---RELWKR-YE-CMEGIGCVNTVAGTP-S ESSVWMFDRFMYKLTHSMVW--------GDLGRVYVDNQTFSKLKEGAPHCISA--PYYLNIDMFHIQVFYKGS--LTLPDPRYELSMSVMYAQHHSFSQ KRCLNALDYGDREESPHQIEHKMRKSVLLYY---TIGW-L-VIIKNADSMMGRLNHHESCAMVMFPLR---GAEGPKQI------------------MEG LFDK-E-E------ARPMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPTNDPIVPEVGANVYQQVSRMKTSIPIYAHVTEYAVG I----VKPFYD-NEWKQEAQGQGEA-GASIPCVNSPDVQYELKHAHIKKLMHT-SLYLMHLKTCHKPVMGTVKGDRCRAIL-KLKMIQLDQ----KTLSQ -NQAAR--LCKRTWLDTSPLQ------MGMNVVLIGQKLGSKH-DYEIAADPVIMYFYQAPIIMRRTDVFEGTDDQKAQAMRL------RSTASRSIGDD DMLI--MPLVQNAMCVNPEMVRKCQPD-GQDKRGYTWMAKN-DTLCPVSAGQGEC---YAEPHQTCN-----------------KFRVNVIKLLYCKDC- SCNKVLKILKWQLCESV-DDWQTLASDACILAKPQPPVCVWQRELVPQL---NRNVEDLARVIYFGPDEH-------DEGKMMSKIFNLEW-EMDMSHRT -SSVDDNYV-IDMLPFICHRVGPWGHYAQVLAD-QINLTRPHT---LRDLFILA---CPVASGKEQDGAT-ERSQHYV-VYAEL----QNDHKSPISANK HPNKLLDWDTGKT-MDRGAA-MCHEIKLFH-------AEPNVWRQNDHSVMFI-ISDASNRLICANSIMCGLTFMAMACNPHFAKLFMEVVCMRYEYFSN HSIKI------FEVYAEENSWENVCNF-EMMSGRAGAQINHIPQLPEQVC-PPCVLTVSNLKGCC-ASCPANTEITRTV--VIC--MYVSI-LRNVVMQY VFYYKENRSVR-----------QNNTGAAELPK---------------------HHMQDIFID-ELYPGL---YHASPKKK--KVC-GPGDNGGMETIQE DMFQVLVCV ; t64 GIVSECRMIIRDE--TDDAIDEV-C------------LVASMVKKLSGCENQTRNNRECAIT-SPPFIGV---RELFKR-YG-CMEGIGCVNTVAGMP-S ESSVWMFDRFMYKLTHSMVW--------GDLRRVFPDNATFSKLKEGAPHVISH--PYFLGIDMFHDQVVYRGP--LTLPDPRYELSMSVMYAQHHSFSQ KRCLNALDHGDTQESPHGIEHNMRNSVLLYN---PIGF-L-VIIKNINSMMGRLNHHESRAVVGFPLR---GAENPKQI------------------MEG HFDK-E-E------ARPIVQDAASVPGK----AQWVGRVRAWCSSDVKA-----------SKLPRNDPIVPEVGANVYQQLSRMKTNIPIYAHVTEYAVG I----VKPFYD-NEWKQEAQGVGEA-GASIPCVDSPDVQYELKHAEVRKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAIL-KLPMIQLDQ----KTLSH -NQPRR--LCQRTWLDASPLQ------VGMNIMLMGKKAPSKK-DYEIAADPVIMYFYQAPIIMRRTDVFEGTHNKKAQAMRV------RSTASQSIGDD DMLK--MPLVQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQSANQGEK---YAEPHVIGD-----------------KVSVNVIKLLYCKDC- SCNQVLKILVWQLCESV-DDWQTLTSDPCMSAQVNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMSKIFALEW-EMDMSHRV -SSVDDNFV-IEMLPFVCHRIGPWGHYAQMLAD-QIVLTRPHT---LRDLFILS---CPVASGGEQDGNT-ERSQHYI-VYAEL----QNDHKSPISANK HLNKMLDWDTGKT-MDREAT-MCREMKLFY-------AETNLWKQNDHSVMFI-ISDAQNRLICGNSIMCGLTFMAMACNPHFAKLFMEVVCVHYDYGAN HDIKI------FEVYAAENSWENARNF-EMMSGRAGAEINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPAITEITRTV--VIC--MYVSI-LRNDVMQY VFYYKENRSVR-----------QNNTGKAELPK---------------------HHMQDIVID-ELYPGP---YHATPKKH--RFC-GTGDNGGMQAIQE DMFQVLVCV ; t18 GIVSECRMIIRDE--TDDAIDEV-C------------LVASMVKKLSGCENQTRNNRECAIT-SPPFIGV---RELFKR-YG-CMEGIGCVNTVAGMP-S ESSVWMFDRFMYKLTHSMVW--------GDLRRVFPDNATFSKLKEGAPHVISH--PYFLGIDMFHEQVVYRGP--LTLPDPRYELSMSVMYAQHHSFSQ KRCLNALDHGDTQESPHGIEHNMRNSVLLYN---PIGF-L-VIIKNINSMMGRLNHHESRAVVGFPLR---GAENPKQI------------------MEG HFDK-E-E------ARPIVQDAASVPGK----AQWVGRVRAWCSSDVKA-----------SKLPRNDPIVPEVGANVYQQLSRMKTNIPIYAHVTEYAVG I----VKPFYD-NEWKQEAQGVGEA-GASIPCVDSPDVQYELKHAEVRKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAIL-KLPMIQLDQ----KTLSH -NQPRR--LCQRTWLDASPLQ------VGMNIMLKGKKAPSKK-DYEIAADPVIMYFYQAPIIMRRTDVFEGTHNKKAQAMRV------RSTASQSIGDD DMLK--MPLVQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQSANQGEK---YAEPHVIGD-----------------KVSVNVIKLLYCKDC- SCNQVLKILIWQLCESV-DDWQTLTSDPCMSAQVNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMFKIFALEW-EMDMSHRV -SSVDDNFV-IEMLPFVCHRIGPWGHYAQMLAD-QIVLTRPHT---LRDLFILS---CPVASGGEQDGNT-ERSQHYI-VYAEL----QNDHKSPISANK HLNKMLDWDTGKT-MDREAT-MCREMKLFY-------AETNLWKQNDHSVMFI-ISDAQNRLICGNSIMCGLTFMAMACNPHFAKLFMEVVCVHYDYGAN HDIKI------FEVYAAENSWENARNF-EMMSGRAGAEINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPAITEITRTV--VIC--MYVSI-LRNDVMQY VFYYKENRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGP---YHATPKKH--RFC-GTGDNGGMQAIQE DMFQVLVCV ; t68 GIVSECRMIIRDE--TDDAIDEV-C------------LVASMVKKLSGCENQTRNNRECAI--SPPFIGV---RELFKR-YG-CVEGIGCVNTVAGMP-S ESSVWMFDRFMYKLTHSMVW--------GDLRRVFPDNAHFSKLKEGAPHVISH--PYFLGIDMFHDQVVYRGP--LTLPDPRYELSMSVMYAQHHSFSQ KRCLNALDHGDTQESPHGIEHNMRNSVLLYN---PIGF-L-VIIKNINSMMGRLNHYESRAVVGFPLR---GAENPKQI------------------MEG HFDK-E-E------ARPIVQDAASVPEK----AQWVGRVRAWCSSDVKA-----------SKLPRNDPIVPEVGANVYQQISRMKTNIPIYAHVTEYAVG I----VKPFYD-NEWKQEAQGVGEA-GASIPCVDSPDVQYELKHAEVRKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAIL-KLPMIQLDQ----KTLSH -NQPRR--LCQRTWLDASPLQ------VGMNIMLMGKKAPSKK-NYEIAADPVIMYFYQAPIIMRRTDVFEGTHNKKAQAMRV------RSTASQSIGDD DMLK--MPLRQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQSANQGEK---YAEPVVIGD-----------------KVSVNVIKLLYCKDC- SCNQVLKILVWQLCESV-DDWQTLTSDPCMSAQVNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMSKIFALEW-EMDMSHRV -SSVDDNFV-IEMLPFVCHRIGPWGHYAQMLAD-QIVLTRPHT---LRDLFILS---CPVASGGEQDGNT-ERSQHYI-VYAEL----QNDHKSPISANK HLNKMLDWDTGKT-MDREAT-MCREMKLFY-------AETNLWKQNDHSVMFI-ISDAQNRLICGNSIMCGLTFMAMACNPHFAKLFMEVVCVHYDYGAN HDIKI------FEVYAVENSWENARNF-EMMSGRAGAEINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPAITEITRTV--VIC--MYVSI-LRNDVMQY VFYYKENRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGP---DHATPKKH--RFC-GTGDNGGMQTIQE DMFQVLVCV ; t28 GIVSECRMIIRDE--ADDAIDEM-C------------LVASMVKKLSGCENQARNNRECAI--SPPFIGV---RELFKR-YH-CMEGIGCVNTVAGMP-S ESSVWMFDQFMYKLTHSMIW--------GDLGRVFPDNATFSKLKEGAPHVISH--PYFLGIDMFHAQVFYRGS--LTLPDPRYELSMSVMYSQHHSFSQ KRCLNPLDHGDRQESSHGIEHNMRSSVLLYN---PIGF-L-VIIDNINPMMGRLNHHESRAIVTFPLR---GAEHPKQI------------------VEG HFDK-E-E------ARPIVKDAASVPGK----AQWVGRIRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQDSRMKTNIPIYAHVTEYACG I----VKPFYE-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAIL-KLSMIQLDE----KTLSQ -NQRPG--LCQRTWLDANPLQ------MGMNTMLVGKKTPSKK-DYEIAADPVIMYFYQAPIIMRRTDVFEGTNNKKSQAMRV------QSTSSQSIGDD DMLN--MPLVQNAMCVSSEMVRKCQPD-GQNKRGCTWMAKN-DTLCPQSGNQGEK---FAEPHQIRN-----------------KVSVNVIKLLYCKDC- SCNKMLKILVWQLCESV-DDWQTLASDPCVSAEPNSPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMSKIFALEW-EMDMSHRI -SSVDYNYV-IEMLPFVYHRVGPWGHYAQMLSD-QIVLTRPHT---LRDLFILA---CRVASGGEVDGNT-ERSQHYI-VYAEL----QNDHKSPISANK VLNKMLDWDTGKT-MDREAA-MCREIKLFY-------AETNVWKQNDHSVMFI-ISDAQNRLICGNSIMCGITFMAMACNPHFAKLFMEVACMRYDYFAN PDIKI------FEVYAAENSWENVRNF-EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPDITEITRTV--VIC--MYVSI-LRNDVMQY VFYYKENRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGH---YHATPKKN--RYC-GPGDNGGMQVIEE DMFQVLVYV ; t82 GIVSECRMIIRDE--ADDAIDEM-C------------LVASMVKKLSGCENQARNNRECAI--SPPFIGL---RELFKR-YV-CMEGIGCVNTVAGMP-S ESSVWMFDRFMYKLTHSMVW--------GDLGRVFPDNCTFSKLKEGAPHVISH--PYFLGIDMFHGQVVYRGS--LTLPDPRYELSMSVMYSQHHSFSQ KRCLNPLDHGDRQESPHGIEHNMRSSVLLYN---PIGF-L-VIIDNINPMMGRLNHHESRAIVTFPLR---GAEHPKQI------------------MEG HFDK-E-E------ARPIVQDAASVPGE----AQWVGHIRAWCSSDIKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIYAHTTEYACG I----VKPFYE-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHANVKKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAIL-KLNMIQLDQ----KTLSQ -NQRPG--LCQRTWLDASPLQ------MGMHIMLVGKKTPSKK-DYEIAADPVIMYFYQAPIIMQRTDVFEGTNNKKSQAMRV------HSTSSQSIGDD DMLN--MPLVQNAMCVSSEMVRKCQPD-GQNKRGYTWMAKH-DTLCPQSGNQGEK---YAEPHQIRN-----------------KVSVNVIKLLYCKDC- SCNKVLKILVWQLCESV-DDWQTLASDPCMSAEPNPPVCVWQRELVPQL---NRNVEDLARTIYFGPDEH-------DEGKMMSKIYALEW-EMDMSHRV -SSVDYNYV-IEMLPFVYHRVGPWGHYAQMLAD-QIVLTRPHT---LRDLFILA---CPVASGGEQDGNA-ERSQHYI-VYAEL----VNDHKSPISANK VLNKMLDWDTGKT-MDREAA-MCRDIKLFY-------AETNVWKQNDHSVMFI-ISDAQNRLICGNSIMCGLTFMAMACNPHFAKLFMEVACMRYDYFAN PDIKI------FEVYADENSWENVRNF-EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPDITEITRTV--VIC--MYISI-LRNDVMQY VFYYKENRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGH---YHATPKKN--RFC-GPGDNGGMQVIEE DMFQVLVYV ; t41 GIVSECRMIIRDE--ADDAIDEM-C------------LVASMVKKLSGCENQARNNRECAI--SPPFIGL---RELFKR-YV-CMEGIGCVNTVAGMP-S ESSVWMFDRFMYKLTHSMVW--------GDLGRVFPDNCTFSKLKEGAPHVISH--PYFLGIDMFHGQVVYRGS--LTLPDPRYELSMSVMYSQHHSFSQ KRCLNPLDHGDRQESPHGIEHNMRSSVLLYN---PIGF-L-VIIDNINPMMGRLNHHESRAIVTFPLR---GAEHPKQI------------------MEG HFDK-E-E------ARPIVQDAASVPGE----AQWVGHIRAWCSSDIKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIYAHTTEYACG I----VKPFYE-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHANVKKLMHD-SLYLMHLETCHKPIMGTVKGDRCRAIL-KLNMIQLDQ----KTLSQ -NQRPG--LCQRTWLDASPLQ------MGMHIMLVGKKTPSKK-DYEIAADPVIMYFYQAPIIMQRTDVFEGTNNKKSQAMRV------HSTSSQSIGDD DMLN--MPLVQNAMCVSSEMVRKCQPD-GQNKRGYTWMAKH-DTLCPQSGNQGEK---YAEPHQIRN-----------------KVSVNVIKLLYCKDC- SCNKVLKILVWQLCESV-DDWQTLASDPCMSAEPNPPVCVWQRELVPQL---NRNVEDLARTIYFGPDEH-------DEGKMMSKIYALEW-EMDMSHRV -SSVDYNYV-IEMLPFVYHRVGPWGHYAQMLAD-QIVLTRPHT---LRDLFILA---CPVASGGEQDGNA-ERSQHYI-VYAEL----VNDHKSPISANK VLNKMLDWDTGKT-MDREAA-MCRDIKLFY-------AETNVWKQNDHSVMFI-ISDAQNRLICGNSIMCGLTFMAMACNPHFAKLFMEVACMRYDYFAN PDIKI------FEVYADENSWENVRNF-EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPDITEITRTV--VIC--MYISI-LRNDVMQY VFYYKENRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGH---YHATPKKN--RFC-GPGDNGGMQVIEE DMFQVLVYV ; t71 GIVSECRMIIRDQ--SDDAIDEMVV------------LVASMVKEMSGCENQARNNSECAI--SPQFIGV---RELFKR-YR-CMEGIGCVNTVAGAP-S ESSVWMFDRFMYKLTHSMVW--------GDLARVFHDNGTFAKLKEGAPHVISV--PYFLGIDMFHGQVFLRGS--LTLPDPRYELSMSVMYAQHHSFSQ KRCLNHLDHGDRQESPHGIEHNMRKSVLLYN---PQGY-L-VIIKNVNSMMGRLNHHESRAVVTFPLR---GADNPRQI------------------MEG HFDK-E-E------ARPVVQDAPGVPGK----AQWVGKVRAWCSSDVKA-----------SQLPRNDTIVPEVGANVYQQVSRVKTHIPIYAHVTEYAVG I----VKPFYN-NEWKQEAQGQGEA-GASIPCVDSPDDQYELKHAEVKKLMHD-SLYLPHLETCHKPIMGTVKGGRCRASL-KLKMIQLDQ----KTLSQ -NQQRR--LCQRTWLDSSPLQ------MGMKIMLQGKKTPSKK-DYEIAADPAIMHFYRAPIIMRRTDVFEGTNNKKAQAMRI------RSTASQSIGDD DMLN--MPLHQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQSPNQGEK---YAEPHQIRN-----------------KISVNVIKLLYCKDC- SCNQVLKILVWQLCESV-DDWQTLTSDPCVPAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMRKIFALEW-EMDMSHRD -SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-RLVLTRPHT---LRDLFILA---CPVASGGEQDGNT-ERSQHYI-VYAEL----QNDHKSPISANK HMNKMLDWDTGKT-MDFEAA-MCREIKLFY-------AETNVWKINDHSVMFI-VSDAQNRLICGNSIMCGLTFMAMACNPHFAKLFMEGVCMRYDYSAN HDIKI------FEVYADENSWENVRNF-EMMSGRAGAQINH-PQLPEQVCNPPCHLTVSNLKGVC-ASCPAITEITRTV--VIV--MWISI-LRNDVMQY VVYYKENRSVR-----------QNNTGKAELPR---------------------HPMQEIFID-ELYPGH---YHATPKKN--RFC-GPGDNGGMQSMQE DVFQVLVCV ; t94 GIVSECRMIIRDQ--SDDAIDEMVC------------LVASMVKELSGCENQARNNRECAI--SPPFTGV---RELFKR-YR-CMEGIGCVNTVAGIP-S ESSVWMFDRFMYRLTHSMVW--------GDLGRVFQDNATFSKLKEGAPHVISH--PYFLGIDMFHGQVFYRGS--LTLPDPRYELSMSVMYAQHHSMSQ KRCLNTLDHGDRQESPHGIEHNMRKSVLLYD---PIGY-L-VIIKNVNSMMGRLNHHESRAKVTFPLR---GADNPRQI------------------MEG HFDK-E-E------ARPVVQDAPGVPEQ----AQWVGKMRAWCSSDVKA-----------SQLPRNDTIVPEVGANVYQQVSRVKTNIPIYAHVTEYAVG I----VKPFYN-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLPHLETCHKPIMGTVKGGRCRASL-KLNMIQLDQ----KTLSQ -NQQRR--LCQRTWLDASPLQ------MGMKIMLQGKKIPSKK-DYEIAADPAIMYFYQATIIMRRTDVFEGTNNKKAQAMRM------RSTASQSIGDD DMLN--MPLHQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLYPQSANQGEK---YAEPHQIRN-----------------KISVNVIKLLYCKDC- SCNKVLKILVWQLCESV-DDWQTLTSDPCVPAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMRKIFALEW-EMDMSHRD -SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-KLVLTRPHT---LRDLFILA---CYVASGGEQDGNT-ERSQHYI-VYAEL----QNDHKSPILANK VVNKMLDWDMGKT-MDFEAA-MCREIKLFY-------AETNVWKQNNHSVMFI-VSDAHNRLICGNSIMCALTFMAMACNPHFAKLFMEAVCMRYDYSAN HDIKI------VEVYADENSWENVRNF-EMMSGRAGAQINH-PQLPEQVCNPPCHLTVSNLKGVC-ASCPAITEITRTV--VIV--MWISI-LRNDVMQY VFYYKENRSVR-----------QNNTGKAELPR---------------------HPVQDIFID-ELYPGH---YHATPKKV--RFC-GPGDNGGMQAIQE DVFQVLVCV ; t99 GIVSEVRMIIRDE--SDDAIDEM-C------------LVASMVKALSGCENQARNNRECAI--SPPFIGV---RELFKR-YG-CMEGIGCVNTVAGIP-S ESSIWMFDRFMYKLTHSMVW--------GDLGQVFPDVSTFSKLKEGAPHVISQ--PYFLGIDMFHDQVFYRGS--LTLPDPRYELSMSVMYAQHHSFSQ KRCLNALDHGDRQESPHGIEHNMRKSVLLDS---PIGY-L-VIIKNINSMMGRLNHHESRAVFAFPLR---GAENPKQI------------------MEG HFDK-EKE------ARPVVQDAPGVPGK----AQWVGRIRAWCSSDVKA-----------SELPRNDAIVPEVGANVYQQVSRMKTNIPIYAHATEYAVG I----VKPFYD-NEWKQEAQGQGET-GASIPCVDSPDVQYEMKHAEVKKLMHD-SLYLMHIETCHKPIMGTVKGDRCRATL-KLKMIQLDQ----KTLSQ -NQKRR--LCQRTWLDASPLQ------MGMKMMLQGKKTPSKK-DYEIAADPVIMYFYQSPIIMRRTDVFEGTNNKKAQAMRV------RSTASQSIGDD EMLN--MPLVQNAMCVPSEMVRKCQPD-GQDKRGYTWMAKD-DPLCPQSANVGEK---YAEPHQIRN-----------------KVSVNVIKLLYCKDC- SCNKVLKILVWQLCDSVQDDWQPLTSDPCVNAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMNKIFALEW-EMDMSHRD SSSVDDNYI-IEMLPFVCHRVGPWGHYAQQLAD-RLVLTRPHT---LRDIFILA---CPVASGGEQDGNT-ERSQHYI-VYAEL----QDDHKSPISANK HMRKMLDWDTGKT-MDREAA-VCREIKLFY-------AETNCWKQNDHSVMFI-VSDAQNRLICGSSIMCRLTFMAMAVNPHFAKLFMEDVCMRYDYSPN HDIKI------FEVYADENSWENVRNF-EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPAITEITRTV--VIC--MYVSI-LRNDVMQY VFYYKENRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGR---YHATPKKN--RYC-GPGDNGGMQPIQE DMFQVLVCV ; t40 GIVSECRMIIRDE--SDDAIDEV-C------------LVASMVKALSGCENQARNNRECAI--SPPFIGV---RELFKR-YG-CMEGIGCVNTVAGIP-S ESSVWMFDRFMYKLTHSMVW--------GDLGRVYPDNPTFSKLKEGAPHVISH--PYFLGIDMFHDQVFYRGS--LTLPDPRYELSMSVMYAQHHSFSQ KRCLNALDHGDRQESPHGIEHNMRKSVLLDS---SIGY-I-VIIKNLNSMMGRLNHHESRAVVTFPLR---GAEDPKQI------------------MEG HFDK-E-E------ARPVVQDAPGVPGK----AQWVGRVRAWCSSEIKT-----------SELPRNDAIVVEVGANVYQQISRMKTDIPIYAHATEYAVG I----VKPVYD-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLMHLETCHKPIMGTVKGDRCRATL-KLKMIQLDQ----KTLSQ -NQKKR--LCQCTWLDACPLQ------MGMKIMLQGKKTPSKK-DYEIAADPVIMYFYQSPIIMRRTDVFEGTNNKKAQAMRI------RSTASQSIGDD EMLN--MPQVQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQSANVGEK---YAEPHQIRN-----------------KVSTNVIKLLYCKDC- SCNKVLKILVWQLCDSV-DDWQTLTSDPCVSAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMRKIFALEW-EMDMSHRN -SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-RLVLTRPHT---LRNIFILA---CPVASGGEQDENT-ERSQHYI-VYAEL----QNDHKSPIHANK VMNKMLDWDTGKT-MDREAA-MCREIKLFV-------AETNVWKQNDHSVMFIVISDAQNRLICGSSIMCRLTFMAMAVNPHFAKLFMEESCVRYDYSAN HDIKI------FEVYANENSWENVRNF-EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASVPAITEITRTV--VIC--MYISI-LRNNVMQY VFYYKENRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGH---YQATPKKD--KFC-GPGDNGGMQTIQE DMFQVLVCV ; t90 GIVSECRMIIRDE--SDDAIDEV-C------------LVASMVKALSGCENQARNNRECAI--SPPFIGV---RELFKR-YG-CMEGIGCVNTVAGIP-S ESSVWMFDRFMYKLTHSMVW--------GDLGRVYPDNPTFSKLKEGAPHVISH--PYFLGIDMFHDQVFYRGS--LTLPDPRYELSMSVMYAQHHSFSQ KRCLNALDHGDRQESPHGIEHNMRKSVLLDS---PIGY-I-VIIKNVNSMMGRLNHHESRAVVTFPLR---GAEDPKQI------------------MEG HFDK-E-E------ARPVVQDAPGVPGK----AQWVGRVRAWCSSEIKT-----------SELPRNDAIVPEVGANVYQQISRMKTDIPIYAHATEYAVG I----VKPVYD-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLMHLETCHKPIMGTVKGDRCRATL-KLKMIQLDQ----KTLSQ -NQKKR--LCQCTWLDACPLQ------LGMKIMLQGKKTPSKK-DYEIAADPVIMYFYQSPIIMRRTDVFEGTNNKKAQAMRV------RSTASQSIGDD EMLN--MPLVQNAMCVSSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQSANQGEK---YAEPHQIRN-----------------KVSTNVIKLLYCKDC- SCNKVLKILVWQLCDSV-DDWQTLTSDPCVSAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMRKIFALEW-EMDMSHRN -SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-RLVLTRPHT---LRNIFILA---CPVASGGEQDENT-ERSQHYI-VYAEL----QNDHKSPINANK VMNKMLDWDTGKT-MDREAA-MCREIKLFV-------AETNVWKQNDHSVMFIVISDAQNRLICGSSIMCGLTFMAMAVNPHFAKLFMEESCVRYDYSRN HDIKI------FEVYANENSWENVRNF-EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASVPAITEITRTV--VIC--MYISI-LRNNVMQY VFYYKENRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGH---YHATPKKD--KFC-GPGDNGGMQTIQE DMFQVLVCV ; t4 GIVSECRMIIRDE--PDDAIDEV-C------------LVASMVKELSGCENQARNNRECAI--SPPFIGV---RELFKR-YG-CMEGIGCVNTVAGMP-S ESSVWMFDRFMYKLTHSMVW--------GDLGRVFPDNATFSKLKEGAPHVISV--PYFLGIDMFHDQVFYRGS--LTLPDPRYEMSMSVMYAQHHSFSQ KRCLNALDYGDRQESPHGIEHNMRKNVLLDN---PIGH-L-VIIKNENSMMGRLNHHESRAVVTFPLR---GAEDPKQK------------------MEG SFDK-E-E------ARPVVQDAPGVPGR----AQWVGRVRPWCSSDVKA-----------SELPRNDAIWPEVGANVYQQISRMKTNIPIYPHVTEYAVG I----VKPFYL-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLWHLETCVKPIMGTVKGDRCRATL-KLKMIVLDP----KTLSQ -NQKRR--LCQRTWLDASPLQ------VGMKIMLQGKKTPSKK-DYEIAADPVIMYFYQSPIIMRHTDVFEGTNNKKAQAMRL------RSTASQSIGDD EMLN--MPLIQNAMCVQSEMVRKCQPD-GQDKRGYTWMAKD-DTLCPQLHNQAEK---YAEPHQIRN-----------------KVSVNVIKLLYCKDC- SCNKVLKILVWQLCESV-DDWQTLTSDPCISAEPNPPVCVWQRELVPQL---NRNIEDLARIIYFGPDEH-------DEGKMMGKIFALEW-EHDMSHRD -SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-RLVLTRPHT---LRDIFILA---CPVASGGEQDAVT-ERSQHYI-VYAEL----QNDHKSPISASK HMNKMLDWDTGKT-MDREAA-MCREIKLFY-------AETNVWKQNDHSVMFI-ISDAQNRLICGSSIMCGLTFMATACNPHFAKLFMEEVCMRYDYAAN HDIKI------SEVYAEENSWENVRNF-EMMSGRAGAQINH-PQLPEQVV-PPCHLTVSNLKGVC-ASCPAITEITRTV--VIC--MYVSI-LRNVVMQY VFYYKENRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGH---YHATPKKN--RCC-GPGDNGGMQTVQE DMFQVLVCV ; t36 MIWSEVRKMVRER--REQAIDNK-C------------LIAARVKMMSGYVLQVRNALEDAL--YVFFTGTKGTRELVKY-YM-CMEGHGCVNTMAKKS-S QSGIWKFNAFMC-LMHLMEG--------GDL--------------------CSY--PSFLGMCMFHAMVQSKGD--LVLPDSPNEDSVSFMYIQHHSFHE KRCLNPLNVGDREDSEHIKEKHMRI----YA---NIGG-H-IIIREWNDVMGRLNHIEPGAEVIFPLRK--RGQHSKPV------------------IDG FFAK-D-E------DRPGIQNAVSVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEPSKLHKTYRINRKVGANAYDQDSRMKAAAPVYPHIMEYAHG M----FNPFYGLSEPKNNAQGNGEN-PMNKPCVESEDCQYEKKHASMDKLMHQ-SLYLMHINIMSKPAMGEWVGNRCRNELTALRIIQLDVGVSGKTLGQ -NIGVSKLLNDRTWLATSPLE------IGCGVMSVEKKEASPK-EFEVAADPTVIYFYRNLIIQKITDVLSAVRMESPQEMRT------QDVNSSSLGDN NGQA--MNIVPYWVCVASGVVRKTHKD-SVDKRGQTWTAKS-DFLCPLAVDQGEP---GAEQKPAVGEENPICKPWQFVIMNTTTFAKNIFRLLYVKDV- SCNGVLPLLKWQLCDSH-GDWQSLYADSCPIA-VNAAVCGWKRELVPGL---NHSCEHLAKSVYFEPDGE-------GEGKVMLKIFGLDWCEVERSHEH -SSVDDNYN-VNMLPFQNSRKDPVGHYVQDLED-ARRLIRPGT---ARSLTILF-YGCQYYSGEFQDCEI-ERSQLYN-VYCEH----KQDHKSAIIANK QEQKGMDWNTGKE-MEQGPK-IILHGSLFF-------AEPNIVRQPGVSHIFI-GNDARRVLICGKSMMPGHRFMREACVPFFHKLFMAVNQMRYDYMMN YDIKIYETHWRMGVYALDNSWETLNVS-EMTSGRIGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPDIKIY--MWGTINFKNRVMQY CNYYKENRSVR-----------INNTGLAELPK---------------------YHHQNILID-ELYSNV---YPAAPKKH--QYM-GVGDVGGYEVICE NLFQILVVE ; t87 MIWSEVRKMVRER--REQAIDNK-C------------LIAARVKMMSGYMLQVRNALEDAL--YVFFTGNKGTRELVKY-YM-CMEGHGCVNTMAKKS-S QSGIWKFNAFMC-LMHLMEG--------GDL--------------------CSY--PSFLGMCMFHAMVQSKGD--LVLPDSPNEDSVSFMYIQHHSFHE KRCLNPLNVGDREDSDHVKEKHMRI----YA---NIGG-H-IIIREWNDVMGRLNHIEPGAEVIFPLRK--RGQHSKPV------------------IDG FFAK-D-E------DRPGIQNAMSVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEPSKLHKTYRINRKVGANAYDQDSRMKAAAPVYPHIMEYAHG M----FNPFYGLSEPKNNAQGNGEN-PMNKPCVESEDCQYEKKHASMDKLMHQ-SLYLMHINVMSKPAMGEWVGNRCRNDLTALRIIQLDVGHSGKTLGQ -NIGVSKLLNDRTWLATSPLE------IGCLVMSVEKKEASGK-EFEVAADPTVIYFYRNLIIAKITDVLSAVRMESPQEMRE------QDVNSSSLGDN NGQA--MNIVPYWVCVGSGVVRKTHKD-SVDKRGQTWTAKS-DFLCPLAMDQGEI---GAEQKPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV- SCNGVLPLLKWQLCDSH-GDWQSLYADSCPIA-INAAVCGWKRELVPGL---NHSCEHLAKSVYFEPDGE-------GEGKVMLKIFGLDWCEVERSHEH -SSVDDNYN-VNMLPFQNSRKDPVGHYVQDLED-ARRLIRPGT---ARSLTILF-YGCQYYSGEFQDCEI-ERSQLYN-VYCEH----KQDHKSAIIANK QEQKGMDWNTGKE-MEQGPK-VILHGTLFF-------AESNIVRQPGVSHIFI-GNDARRVLICGLSMMPGHRFMREACVPFFHKLFMAVNQMRYDYMVN YDIKIYETHWRMGVYALDNSWETLNVS-EMTSGRIGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPDIKIY--MWGTICFKNRVMQY CNYYKENRSVR-----------INNTGLAELPK---------------------YHHQNILID-ELYSNV---YPAAPKKH--QYM-GVGDVGGYEMICE NLFQILVVE ; t89 MIWSEHRKMVREG--REQAIDNK-C------------LIAKRVKVMSGYILQVRNAPEDAL--YNFFTGHKGTRELVKY-YM-CMEGEGVVNTMAKKS-S QSGIWKFNAFMC-LMRLMQG--------GDL--------------------CSY--PSFLGMCMFYAMVQSKGD--LVLPDSCNEDSVSFMYIQHHSFHE KRCLNPLNVGDREDSDHTKEKHMRI----YA---NIGG-H-IIIREWNDVMGRLNVIEPEAEVIFPLRK--RGQHSKPV------------------IDG FFAK-D-E------ERPGIQNAMNVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEPSKLHKTYKINRKVGANAYDQVSRMKAAAPVYPHIMEYAHG M----FNPFYGLSEPKNNAQGNGEN-PMNVPCVESEDCQYEKKHASMTKLMHQ-SLYLMHINIMSKPAMGEWVGVRCRNELTALRIVQLDVGHSGKTLGQ -NIGVSKLLNDRTWLATSPLE------IGCGVISVEKKEASPK-EFEVAADPTVIYFYRSLIIVHITDVLSAVRMDSPQEMRN------QDVNSPSLGDN NGQA--MNIVPYWVCVASGVVRKMHKD-SVEKRGQTWEAKS-DFLCPLAVDQGEP---AAEQRPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV- SCNGVLPLLKWQLCDSH-GDWQSLFPDSCPFA-LNAAVCGWKRELVPGL---NHSCEHLAKGVYFEPDGE-------DEGKMMLKIFGLEWCEVERSHEH -SSIDDNYT-VNMLEFPNSRKDPVGHYVQNLED-AIRLIRPGT---ARSLTILL-YGCQYYSGEFQDCEV-ERSQCYN-VYCEL----KQDHKSAIIANK QEQKGMDWNTGKE-MEQGPK-IILHGCLFF-------AEPNIVRQPGVSHIFT-ENDARRMLICGNSMMPRHRFMREACVPFFHKLFMAVNQMRYDYVTN YDIKIYETHWRVGVYAIDNSWETLNVS-EMTSGRMGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPDIEIY--MWGTINFKNQVMQY CNYYKENRSVR-----------INNTGGAELPK---------------------YHHQNILID-ELYNNI---YPAAPKKH--KFL-GVGDVGGYEIICE NLFQILVVE ; t39 MIWSEHRKMVREG--REQAIDNK-C------------LIAKRVKVMSGYILQVRNAPEDAL--YNFFTGHKGTRELVKY-YM-CMEGEGVVNTMAKKS-S QSGIWKFNAFMC-LMRLMQG--------GDL--------------------CSY--PSFLGMCMFYAMVQSKGD--LVLPDSCNEDSVSFMYIQHHSFHE KRCLNPLNVGDREDSDHTKEKHMRI----YA---NIGG-H-IIIREWNDVMGRLNVIEPEAEVIFPLRK--RGQHSKPV------------------IDG FFAK-D-E------ERPGIQNAMNVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEPSKLHKTYKINRKVGANAYDQVSRMKAAAPVYPHIMEYAHG M----FNPFYGLSEPKNNAQGNGEN-PMNVPCVESEDCQYEKKHASMTKLMHQ-SLYLMHINIMSKPAMGEWVGVRCRNELTALRIVQLDVGHSGKTLGQ -NIGVSKLLNDRTWLATSPLE------IGCGVISVEKKEASPK-EFEVAADPTVIYFYRSLIIVHITDVLSAVRMDSPQEMRN------QDVNSPSLGDN NGQA--MNIVPYWVCVASGVVRKMHKD-SVEKRGQTWEAKS-DFLCPLAVDQGEP---AAEQRPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV- SCNGVLPLLKWQLCDSH-GDWQSLFPDSCPFA-LNAAVCGWKRELVPGL---NHSCEHLAKGVYFEPDGE-------DEGKMMLKIFGLEWCEVERSHEH -SSIDDNYT-VNMLEFPNSRKDPVGHYVQNLED-AIRLIRPGT---ARSLTILL-YGCQYYSGEFQDCEV-ERSQCYN-VYCEL----KQDHKSAIIANK QEQKGMDWNTGKE-MEQGPK-IILHGCLFF-------AEPNIVRQPGVSHIFT-ENDARRMLICGNSMMPRHRFMREACVPFFHKLFMAVNQMRYDYVTN YDIKIYETHWRVGVYAIDNSWETLNVS-EMTSGRMGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPDIEIY--MWGTINFKNQVMQY CNYYKENRSVR-----------INNTGGAELPK---------------------YHHQNILID-ELYNNI---YPAAPKKH--KFL-GVGDVGGYEIICE NLFQILVVE ; t8 MIWSEHRKMVREG--REQAIDNK-C------------LIAKRVKVMSGYILQVRNAPEDAL--YNFFTGHKGTRELVKY-YM-CMEGEGVVNTMAKKS-S QSGIWKFNAFMC-LMRLMQG--------GDL--------------------CSY--PSFLGMCMFYAMVQSKGD--LVLPDSCNEDSVSFMYIQHHSFHE KRCLNPLNVGDREDSDHTKEKHMRI----YA---NIGG-H-IIIREWNDVMGRLNVIEPEAEVIFPLRK--RGQHSKPV------------------IDG FFAK-D-E------ERPGIQNAMNVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEPSKLHKTYKINRKVGANAYDQVSRMKAAAPVYPHIMEYAHG M----FNPFYGLSEPKNNAQGNGEN-PMNVPCVESEDCQYEKKHASMTKLMHQ-SLYLMHVNIMSKPAMGEWVGVRCRNELTALRIVQLDVGHSGKTLGQ -NIGVSKLLNDRTWLATSPLE------IGCGVISVEKKEASPK-EFEVAADPTVIYFYRSLIIVHITDVLSAVRMDSPQEMRN------QDVNSPSLGDN NGQA--MNIVPYWVCVASGVVRKMHKD-SVEKRGQTWEAKS-DFLCPLAVDQGEP---AAEQKPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV- SCNGVLPLLKWQLCDSH-GDWQSLFPDSCPFA-LNAAVCGWKRELVPGL---NHSCEHLAKGVYFEPDGE-------DEGKMMLKIFGLEWCEVERSHEH -SSIDDNYT-VNMLPFPNSRKDPVGHYVQDLED-AIRLIRPGT---ARSLTILL-YGCQYYSGEFQDCEV-ERSQCYN-VYCEL----KQDHKSAIIANK QEQKGMDWNTGKE-MEQGPK-IILHGCLFF-------AEPNIVRQPGVSHIFT-ENDARRMLICGNSMMPRHRFMREACVPFFHKLFMAANQMRYDYVTN YDIKIYETHWRVGVYAIDNSWETLNVS-EMTSGRMGAKINHLPRLPEHVI-PPCMLTVSGLKDTM-AGMKQETEITKTPDIEIY--MWGTINFKNQVMQY CNYYKENRSVR-----------INNTGGAELPK---------------------YHHQNILID-ELYNDI---YPAAPKKH--KFL-GVGDVGGYEIICE NLFQILVVE ; t6 MIWSEHRKMVREG--REQAIDNK-C------------LIAKRVKVMSGYILQVRNAPEDAL--YNFFTGHKGTRELVKY-YM-CMEGEGVVNTMAKKS-S QSGIWKFNAFMC-LMRLMQG--------GDL--------------------CSH--PSFLGMCMFYAMVQSKGD--LVLPDSCNEDSVSFMYIQHHSFHE KRCLNPLNVGDREDSDHTKEKHMRI----YA---NIGG-H-IIIREWNDVMGRLNVIEPEAEVIFPLRK--RGQHSKPV------------------IDG FFAK-D-E------ERPGIQNAMNVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEPSKLHKTYKVVRKVGANAYDQVSRMKAAAPVYPHIMEYAHG M----FNPFYGLSEPKNNAQGNGEN-PMNVPCVESEDCQYEKKHASMTKLMHQ-SLYLMHINIMSKPAMGEWVGVRCRNELTALRIVQLDVGHSGKTLGQ -NIGVSKLLNDRTWLATSPLE------IGCGVISVEKKEASPK-EFEVAADPTVIYFYRSLIIVHITDVLSAVRMDSPQEMRN------QDVNSPSLGDN NGQA--MNIVPYWVCVASGVVRKMHKD-SVEKRGQTWEAKS-DFLCPLAVDQGEP---AAEQKPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV- SCNGVLPLLKWQLCDSH-GDWQSLFPDSCPFA-LNAAVCGWKRELVPGL---NHSCEHLAKGVYFEPDGE-------DEGKMMLKIFGLEWCEVERSHER -SSIDDNYT-VNMLPFPNSRKDPVGHYVQDLED-AIRLIRPGT---ARSLTILL-YGCQYYSGEFQDCEV-ERSQCYN-IYCEL----KQDHKSAIIANK QEQKGMDWNTGKE-MEQGPK-IILHGCLFF-------AEPNIVRQPGVSHIFI-ENDARRMLICGNSMMPRHRFMREACVPFFHKLFMAVNQMRYDYMTN YDIKIYETHWRVGVYAIDNSWETLNVS-EMTSGRMGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPDIEIY--MWGTINFKNQVMQY CNYYKENRSVR-----------INNTGGAELPK---------------------YHHQNILID-ELYNNI---YPAAPKKH--KFL-GVGDVGGYEIICE NLFQILVVE ; t58 MIWSEYRHMVREG--RDQAIDNK-C------------LIAARVKVMSGYVLQVRNAPEDAL--YNFFTGHKGTRELVKY-YM-CMEGEGCVNTMAKKS-S QSGIWKFNAFMC-LMHLMQG--------GDL--------------------CSH--PSFLGMCMFHAMVKSKGN--LVLPDSCNEDSVSFMYIQHHSFHE KRCLNHLNVGDREDSEHIKEKHMRV----YA---SIGG-H-IIIREWNDVMGRLNHIEPGAEVTFPLRR--RGQASKPV------------------IDG FFAK-D-E------DRPGIQNAMSVPCG----DQWVGSVRGWCSSQHRYGLAIHILVHQQSRLHKTYNIDRKNGANAYEQDSRMKAGAPVYPHIMEYAHG M----FNPFYGMSEPKNNAQGNGEN-PMNVPCVESDDCQYEKKHASMDKQMHQ-SLYLMHMNIMSKPAMGEWVGNRCRNELTALHVVQLDVGFSGKTLGQ -NIGISELLNDRTWLATSPLE------IGCGVMAVEKKEASPK-EFEVAADPTVIYFYRNLIIQHITDVLSAVRMDSPQEMRV------QDVNSPSLGDN NGQA--MNIVPYWVCVVSGVVRKTHKD-SVEKRGQTWTAKS-DFLCPIAVNQGEP---GAEQKPAVGA-NPIVKPWQFVIMNTTTFAKNIFRLLYVKDV- SCNGVLPLLKWQLCDSH-GDWQSLFADSCPTA-VNAAVCAWKRELVPGL---NHSCEHLAKSVYFEPDGE-------DEGKMMLKIFGLDWCEVERSHEH -SSVDDNYN-VNMLPFPNSRQDPVGHYVQDLED-LLRLIRPGT---ARSLTVLF-YGCQYYSGQFQDCEI-ERSQLYN-VYCEL----KQDHKSAIMANK QEQKGMDWNTGKE-MEQGPK-IILHGVLFF-------AEPNVVRQPGVSHIFV-GNDARRMLICGLSMMPGHRFMKEACVPFFHKLFMAVRRMRYDYMSN YDIKIYETHWRPGVYALDNSWETLNVS-EMTSGRIGAKINHLPRLPETVI-PPCVLTVSGLKDTM-AGMKHGTEITKTPDIKIH--MWGTINFKNKVMQY CTYYKENRSVR-----------INNTGGAELPK---------------------YHHQNILID-ELYSNI---YPATPKKH--QYM-GVGDVGGYEVICE NLFQILVVE ; t54 MIWSEYRHMVREG--RDQAIDNK-C------------LIAARVKVMSGYVLQVRNAPEDAL--YNFFTGHKGTRELVKY-YM-CMEGEGCVHTMAKKS-S QSGIWKFNAFMC-LMHLMQG--------GDL--------------------CSH--PSFLGMCMFHAMVQSKGN--LVLPDSCNEDSVSFMYIQHHSFHE KRCLNHLNVGDREDSEHIKEKHMRI----YA---SIGG-H-IIIREWNEVMGRLNHIEVGAEVTFPLRR--RGQASKPV------------------IDG FFAK-D-E------DRPGIQNAMSVPCG----EQWVGSVRGWCSSQHRYGLAIHILVHQQSRLHKTYNIDRKNGANAYDQDSRMKAGAPVYPHIMEYAHG M----FVPFYGMSEPKNNAQGNGEN-PMNVPCVESDDCQYEKKHASMDKQMHQ-SLYLMHMNIMSKPAMGEWVGNRCRNELTALHIVQLDVGFSGKTLGQ -NIGISELLNDRTWLATSPLE------IGCGVMAVEKKEASPK-EFEVAADPTVIYFYRNLIIQHITDVLSAVRMDSPQEMRV------QDVNSPSLGDN NGQA--MNIVPYWVCVVSGVVRKTHKD-SVEKRGQTWTAKS-DFLCPIAVNQGEP---GAEQKPAVGA-DPIVKPWQFVIMNTTTFAKNIFRLLYVKDV- SCNGVLPLLKWQLCDSH-GDWQSLFADSCPIA-VNAAVCAWKRELVPGL---NHSCEHLAKSVYFEPDGE-------DEGKMMLKIFGLDWCEVERSHEH -SSVDDNYN-VNMLPFPNSRKDPVGHYVQDLED-LLRLIRPGT---ARSLTVLF-YGCQYYSGEFQDCEI-ERSQLYN-VYCEL----KQDHKSAIMANK QEQKGMDWNTGKE-MEQGPK-IILHGVLFF-------AEPNVVRQPGVSHIFV-GNDARRMLICGLSMMPGHRFMKEACVPFFHKLFMAVRRMRYDYMTN YDIKIYETHWRPGVYALDNSWETLNVS-EMTSGRIGAKINHLPRLPETVI-PPCVLTVSGLKDTM-AGMKHGTEITKTPDIKIH--MWGTINFKNKVMQY CTYYKENRSVR-----------INNTGGAELPK---------------------YHHQNILID-ELYSNI---YPATPKKH--QYM-GVGDVGGYEVICE NLFQILVVE ; t65 MIWSEYRQMVREG--NDQSIDNK-C------------LIADRVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTRELIKF-YF-CMEGCGCVNTMAKNS-S QSAPWKFNRFMC-LMHLMQS--------GDL--------------------CNS--PSFLGMCMFHPQVQAKGE--LVLPDNPNENSVEFMYRRHHSLSD KRCLNLLYVGDSQDSQHLREKQLRI----YG---RIGK-V-VIIKVWNAVMGRLNHHEPPAEVMFPLRK--GGQDSKPF------------------IDG FF------------DRPGIANAMSVKCG----DQWVGSIRGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQTSRMKAAAPLYPHIMEYAHG M----VKPFYGISEPKNEAQGNGEN-PMNVPSVESGECQYEHKHAAMEKLMHH-SLYLMRVNTMSKPVMGDWVGNRCRAELTALQTVQLDVGSSGKTLGQ -NIVTSKKLGDRTWLAASPLQ------IGCGVMVQEKKIASPQ-EVEVAAD---IYFYRNMVVQRLTDVVAAVRMQSPQEMRTPVLVCIQKVDSVSLGDD DPQM--MNIVPYWVCVCSGVVKKAEPD-SVDKRGKTWVAKS-DFLCPAAVNEGEH---GAEQRP-FGTQNPVCKPWRFVIMNTQTFAKNIIRLLYIKDI- SCNQVLQLLDWQLCDSH-GDWQSLMADSCPNA-VNIAVCCWKRELMPGL---NHSCEHLAKSVYFKPDGE-------DEGQMTMKIFNLDWCEVEKSHEK -SSVDDNYN-VNMLPFMQSHGDPVGHYVQELED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDYKSAIIANQ QEHKGMEWDTGKE-MQQGPK-VVVHLALFYAPSNVLAAEPNIIGQPQVSHVFV-GNDARRMLIVGVSMMPAYKFMREACVPFFRKLFMAENQVRYDYMWN YDIKIYETHFRMGVYAVDNSWETLVYC-EMTSGRIGAKINHLPRLPEQVT-PPCILTVSGLKRTV-AGAPDETEITKSPVIHIV--MWADIMFKNCVMQY CTYYKENRSVR-----------ANNTGGAELPK---------------------YHQQNILTD-ELYSDV---YPAAQKKY--QVM-GVGDIGGYELICE DLFQILVCQ ; t29 MIWSEYRQMVREG--NDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTRELIKF-YF-CMEGCGCVNTMAKNS-S QSAPWKFNRFMC-LMHLMQS--------GDL--------------------CNC--PSFLGMCMFHPQVQAKGE--LVLPDNPNENSVEFMYRRHHSLSD KRCLNLLFVGDSQDSQHQREKQLRI----YG---RIGK-V-VIIKVWNAAMGRLNHVEPPAEVMFPLRR--GGSDSKPF------------------LDG FF------------DRPGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILIHGPSKLHRTYAINKKMGANVYQQTSRMKAAAPLYPHIMEYAHG M----VKPFYGQSEPKNEAQGNGEN-PMNVPSVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMPKPVMGDWDGNRCRAELTALQTVQLDVGSSGKTLGQ -NIVTSKKLGDRTWLAASPLQ------IGCGVMVQEKKIASPQ-EVEVAAD---IYFYRNMVVQRLTDVVAAVRMQSPQEMRTPVLVCIQKVDSVSLGDD DPQM--MNIVPYWVCVCSGVVKKAEPD-SVDKRGKTWVAKS-DFLCPPAVNEGEH---GAEQRPVFGTHNPVCKPWRFVIMHTQTFSKNIIRLLYIKDI- SCNQVLQLLDWQLCDSH-GDWQSLMADSCPNA-VNIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEGQMSQKIFNLDWCEVEKSHEK -SSVDDNYN-VNMLPFMQSHGDPVGHYVQELEDSALRLIRPGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDYKSAIIANQ QDHKGMEWDTGKE-MQQGPK-VVVHLALFYMPSNVLAAEPNIIGQPQVSHWFV-GNDARRMLIVGVSMMPAYKFMREACVPFFRKLFMADNQVRYDYMWN YDIKIYETHFRMGVYAVDNSWETLVYC-EMTSGRIGAKINHLPRLPEQVT-PPCILTVSGLKRTV-AGAPDETEITKSPVIHIV--MWADIMFKNCVMQY CTYYKENRSVR-----------ANNTGGAELPK---------------------YHQQNILTD-ELYSDL---YPAAPKKY--QEM-GVGDIGGYELIVE DLFQILVCE ; t98 MIWSEHRQMVREG--NDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTKELVKF-YF-CMEGCGCVNTMAKKS-S QSAPWKFNRFMC-LMHLMQS--------GDL--------------------CNS--PPFLGMCMFHPQVQMKGE--LVLPDNPNENSVEFMYRRHHSLSD KRCLNLLYVGDSQDSQHLREKQLRI----YG---RIGK-V-VIIKVWNAVMGRLNHHEPPAEVMFPLRR--GGSDSKPF------------------IDG FF------------DRPGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILVHNPSRLHRTYAINKKMGANVYQQSSRMKAAAPLYPHIMEYAHG M----VKPFYGISEPKNEAQGNGEN-PMNVPSVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGNRCRAELTALQTVQLDVGSSEKTLGQ -NIVTSKRLGDRTWLAASPLQ------IGCGVMVQEKKIASPQ-EVEVAAD---IYFYRNMVVQRHTDVVAAVRMQSPQEMRNPVLVCIQKVDSASLGDD DPQM--MKIVPYWVCVCSGVVKKAEPD-SVDKRGKTWVAKS-DFLCPPAVNEGEH---GAEQRPVFGTQNPVCKPWRFVIMNTQTFSKNIIRLLYIKDI- SCNQVLQLLDWQLCDSH-GDWQSLMADSCPPA-VNIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEGQMTMKIFNLDWCEVEKSHEK -SSVDDNYN-VNMLPFMQSHGDPVGHYVQELED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSEMFERSQLYR-VYCEI----KKDYKSAIIANQ QEHKGMEWDTGKE-MQQGPK-IVVHLGLFYMPSNVLAAEPNIIGQPQVSHVFV-ANDARRMLIVGVSMMPAYKFMREACVPFFRKLFMAENQVRYDYVWN YDIKIYETHFRMGVYAVDNSWETLVYC-EMTSGRIGAKINHLPRLPEQVT-PPCDLTVSGLKRTV-AGPPDETEITKSPVIHIV--MWADIVFKNCVMQY CTYYKENRSVR-----------ANNTGGAELPK---------------------YHQQNLLTD-ELYSDI---YPAAPKKY--QVM-GVGDIGGYELIVE DLFQILVCE ; t83 MIWSEYRQMVREG--NDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTKELIKF-YF-CMEGCGTVNTMAKDS-S QSAPWKFNRFMC-LMHLMQS--------GDL--------------------CNS--PSFLGMCMFHPQVQAKGE--LVLPDNPNENSVEFMYRRHHSLSD KRCLNLLYVGDSQDSQHLREKQLRI----YG---RIGK-V-VIIKVWNAVMGRLNHHEPPAEVMFPLRR--GGSCSKPF------------------VDG FF------------DRPGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILIHNPSRLHRTYAINKKMGANVYQQTSRMKAAAPLYPHIMEYAHG M----VKPFYGISEPKNEAQGNGEN-PMNVPSVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGNRCRAELTALQTVQLDLGSSGKTLGQ -NIVTSKKLGDRTWLAASPLQ------IGCGIMVQEKKIASPQ-EVEVAAD---IYFYRNMVVQRHTDVVAAVRMQSPQEMRNPVLVCIQKVDSISLGDD DPQM--MNIVPYWVCVCSGVVKKAEPD-SVDKRGKTWVAKS-DFLCPPAVNEGEH---GAEQRPVFGTQNPVCRPWRFVIMNMQTFSKNIIRLLYIKDI- SCNQVLQLLDWQLCDSH-GDWQSLMADSCPPA-VNIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEGQMTMKIFNLDWCEVEKSHEK -SSVDDNYN-VNMLPFMQSHGDPVGHYVQELED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSDMFERSQLYR-VYCEI----KKDYKSAIIANQ QEHKGMEWDTGKE-MQQGPQ-IEVHLALFYMPSNVLPAEPNIIGQPQVSHVFV-GNDARRMLIVGVSLMPAYKFMREACVPFFRKLFMAENQVRYDYMWN YDIKIYETHFRMGVYAVDNSWETLVYC-EMTSGRIGAKINHLPRLPEQVT-PPCILTVSGLKRTI-AGAPDETEITKSPVIHIV--MWADIMFKNCVMQY CTYYKENRSVR-----------ANNTGGAELPK---------------------YHQQNLLTD-ELYSDI---YPAAPKKY--QVM-GVGDIGGYELIVE DLFQILVCE ; t95 MIWSEYRQMVREG--TDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTRELIKF-YF-CMEGCGCVNTMAKNS-S QSAPWKFNRFMC-LMHLMQL--------GDL--------------------CNP--PSFLGMCMFHPQVRSKGE--LVLPDNPNENSVEFMYRRHHSLSD KRCLNLLYTGDSQDSQHLREKQLRI----YG---RIGR-V-VILQVWNAVMGRLNHHEPPAEVMFPLRK--GGSNSKPV------------------YDG FF------------DRPAIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILVHGPSKLHRTYAINRKVGANVYQQTSRMKAAAPLYPHIMEYAHG M----VKPFYGISEPKNEAQGNGEN-PMNVPSVESGECVYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGHRCRAELTELQTVQLDVGSSGKTLGQ -NIVTSKKLGDRTWLAASPLQ------IGCGVMVQEKKIASPQ-QVEVAAD---IYFYRNMVVQRLTDVVAAVRMQSPQEMRCRKLVCIQKVDSPSLGDD DPQM--MNIVPYWVCVCSGVVKKAEPD-SVDKRGKTWVAKS-DFLCPPAVNEGEH---GAEQRPVFGGQNPACKPWHFVIMNRQTFAKNMIRLLYIKDI- SCNEVLQLLDWQLCDSH-GDWQSLIADSCPNA-SNIAVCCWKRELMPGL---NFSCEHLAKTVYFKPDGE-------DEGQMIMKIFNLDWCEVEKSHEK -SSVDDNYD-VNMLPFMQSHGDPVGHYVQGLED-ALRLIRPGT---ARALIILF-YGCQYYSGKFQDSELFERSQLYR-MYCEI----KKDHKSAIIANQ QEHKGMEWDTGKE-MQQGPK-VVLHHALFYAPSNVLAAEPNIIGQPQVSHVFV-GNDARRMLITGVSMMPAYKFMREACVPFFRKLFMAENQVRYDYVWN YDIKIYETHYRMGVYAVDNSWETLVYC-EMTSGRIGAKVNHLPRLPEQVT-PPWVLTVSGLKRTV-AGAPDETEITKSPIIHII--MWEEIMFKNCVMQY CTYYKENRSVR-----------ANNTGGAELPK---------------------YHQQNILTD-ELYSDI---YPAAPKKY--QVM-GVGDIGGYELICE DLFQILVCE ; t20 MIWSEYRQMVREG--TDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTRELIKF-YF-CMEGCGCVNTMAKNS-S QSAPWKFNRFMC-LMHLMQL--------GDL--------------------CNP--PSFLGMCMFHPQVRSKGE--LVLPDNPNENSVEFMYRRHHSLSD KRCLNLLYVGDSQDSQHLREKQLRI----YG---RIGR-V-VILQVWNAVMGRLNHHEPPAEVMFPLRK--GGSNSKAV------------------YDG FF------------DRPGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILVHGPSKLHRTYAINRKVGANVYQQMSRMKAAAPLYPHIMEYAHG M----VKPFYGISEPKNEAQGNGEN-PMNVPSVESGECVYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGHRCRAELTELQTVQLDVGSSGKTLGQ -NIVTSKKLGDRTWLAAPPLQ------IGCDVMVQEKKIASPQ-QVEVAAD---IYFYRNMVVQRLTDVVAAVRMQSPQEMRTPKLVCIQKVDSPSLGDD DPQC--MNIVPYWVCVCSGVVKKAEPD-SVDKRGKTWVAKS-DFLCPPAVNEGEH---GAEQRPVFGGQNPACQPWHFVIMNTQTFAKNMIRLLYIKDI- SCNQVLQLLDWQLCDSH-GDWQSLIADSCPNA-VNIAVCCWKRELMPGL---NFSCEHLAKTVYFKPDGE-------DEGQMIMKIFNLDWCEVEKSHEK -SSVDDNYD-VNMLPFMQSHGDPVGHYVQGLED-ALRLIRPGT---ARALIILF-YGCQYYSGKFQDSELFERSQLYR-MYCEI----KKDHKSAIIANQ QEHKGMEWDTGKE-MQQGPK-VVLHHALFYAPSNVLAAEPNIIGQPQVSHVFV-GDDARRMLITGVSMMPAYKFMREACVPFFRKLFMAENQVRYDYVWN YDIKIYETHYRMGVYAVDNSWETLVYC-EMTSGRIGAKINHLPRLPEQVT-PPWVLTVSGLKRTV-AGAPDETEITKSPIIHII--MWEDIMFKNCVMQY CTYYKENRSVR-----------ANNTGGAELPK---------------------YHQQNILTD-ELYSDI---YPAAPKKY--QVM-GVGDIGGYELICE DLFQILVCE ; t69 MIWSDYRQMVREG--GDDSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGHKGTRELVKY-YH-CMEGCGCVNTMAKSS-S QSAPWKFNRFMC-LMHLMQE--------GDL--------------------CNS--PGFLGMCMFHPQVQRKGE--LVLPDNPNENSVEFMYRRHHSLSD KRCLNLLYVGDNQDSQHLREKQLRM----YG---EIGK-V-VIIKVVNAVMGRLNHVEPSAEVMFPLRK--GGSDSKPF------------------IDG FF------------DRPGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQISRMKALAPLYPHIMEYAHG M----VKPFYGISEPKNEAQGNGEN-PMNVPCVESGECQYENKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGNRCRAELTALKTGQLDVGSSGKTLGQ -NIMTSKKLGDRTWLAANPLQ------IGRGVMVWEKKVASPQ-EVEVAAD---IYFYRNMVIECLTDVVRAVRMQSPQEMRAPVLVCIQKVDSPSLGDD VPQM--MNIVPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKS-DFLCPPAMNEGEH---GAEQRPVFGTQNPVCKPWRFVIMNTETFAKNIIRLLYVKDV- SCNQVLQLLDWQLCDSH-GDWQSLVADSCPNA-VNIAVCCWKRELMPGL---NHSCQHLAKTVYFKPDGE-------DEGQMIVKIFNLDWCEVEKSHEK -SSVDDNYN-VNMLPFMQSHGDPVGHYVQGLED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHNSAIIANQ QEHKGMDWDTGKE-MQQGPK-IVVVLALFYQPSNVLAAEPNIMGQPQVSVVFV-GNDGRRMLIVGCSMMPDYKFMREACVPFFRKLFMAEVQDRYDYAWN YNIKIYETHFRDGVYAVDNSWETLVYV-EMASGRIGAKINHQPRLPEQVI-PPCILTVSGLKRTI-ADEPDETEITKTPIIHIV--MWADIMFKNCVMQY CNYYKENRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDI---YPAAPKKY--QIM-GVGDIGGYELICE DLFQILVCE ; t2 MIWSDYRQMVREG--GDDSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTRELVKY-YH-CMEGCGCVNTMAKSS-S QSAPWKFNRFMC-LMYLMQD--------GDL--------------------CNS--PGFLGMCMFHPQVQAKGE--LVLPDNPNENSVEFMYRRHHSLSD KRCLNLLYVGDNQDSQHLREKQLRM----YG---EIGK-V-VIIKVVNAVMGRLNHHEPSAEVMFPLRK--GGSDSKPF------------------IDG FF------------DRPGIANAMSVPCG----DQWMGSIKGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQISRMKALAPLYPHIMEYAHG M----VKPFYGISEPKNEAQGNGEN-PMNVPCVESGECQYENKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGNRCRAELTALKTGQLDVGSSGKTLGQ -NIMVSKKLGDRTWLAASPLQ------IGCGVMVWEKKVPSPQ-EVEVAAD---IYFYRNMVIECLTDVVRAVRMQSPQEMRAPVLVCIQKVDSPSLGDD VPQM--MNIVPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKS-DFLCPPAMNEGEH---GAEQRPVFGTQNPVCKPWRFVIMNVETFAKNIIRLLYVKDV- SCNQVLQLLDWQLCDSH-GDWQSMVADSCHNA-VNIAVCCWKRELMPGL---NHSCQHLAKTVYFKPDGE-------DEGQMMVKIFNLDWCEVEKSHEK -SSVDDNYN-VNMLPFMQSHGDPVGHYVQGLED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHNSAIIANQ QEHKGMDWDTGKE-MQQGPK-IVVVLALFYQPSNVLAAEPNIMGQPQVSVVFV-GNDARRMLIVGCSMMPDYKFMREACVPFFRKLFMAEVQDRYDYNWN YNIKIYETHFRVGVYAVDNSWETLVYV-EMASGRIGAKINHMPRLPEQVI-PPCILTVSGLKRTI-ADEPDETEITKTPIIHIV--MWADIMFKNCVMQY CNYYKENRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDI---YPAAPKKY--QIM-GVGDIGGYELICE DLFQILVCE ; t10 MIWSDYRQMVREG--GDDSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTRELVKY-YH-CMEGCGCVNTMAKSS-S QSAPWKFNRFMC-LMYLMQD--------GDL--------------------CNS--PGFLGMCMFHPQVQAKGE--LVLPDNPNENSVEFMYRRHHSLSD KRCLNLLYVGDNQDSQHLREKQLRM----YG---EIGK-V-VIIKVVNAVMGRLNHHEPSAEVMFPLRK--GGSDSKPF------------------IDG FF------------DRPGIANAMSVPCG----DQWMGSIKGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQISRMKALAPLYPHIMEYAHG M----VKPFYGISEPKNEAQGNGEN-PMNVPCVESGECQYENKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGNRCRAELTALKTGQLDVGSSGKTLGQ -NIMVSKKLGDRTWLAASPLQ------IGCGVMVWEKKVPSPQ-EVEVAAD---IYFYRNMVIECLTDVVRAVRMQSPQEMRAPVLVCIQKVDSPSLGDD VPQM--MNIVPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKS-DFLCPPAMNEGEH---GAEQRPVFGTQNPVCKPWRFVIMNVETFAKNIIRLLYVKDV- SCNQVLQLLDWQLCDSH-GDWQSMVADSCHNA-VNIAVCCWKRELMPGL---NHSCQHLAKTVYFKPDGE-------DEGQMMVKIFNLDWCEVEKSHEK -SSVDDNYN-VNMLPFMQSHGDPVGHYVQGLED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHNSAIIANQ QEHKGMDWDTGKE-MQQGPK-IVVVLALFYQPSNVLAAEPNIMGQPQVSVVFV-GNDARRMLIVGCSMMPDYKFMREACVPFFRKLFMAEVQDRYDYNWN YNIKIYETHFRVGVYAVDNSWETLVYV-EMASGRIGAKINHMPRLPEQVI-PPCILTVSGLKRTI-ADEPDETEITKTPIIHIV--MWADIMFKNCVMQY CNYYKENRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDI---YPAAPKKY--QIM-GVGDIGGYELICE DLFQILVCE ; t31 MIWSEYRSMVREG--ADQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YTFFTGQKGTRELIKY-YF-CMEGCGCVNTMAKQS-S QSAPWKFNRFMC-LMHLMQN--------GDL--------------------CNL--PSFLGMCMFHNQVQCKGE--LVLPDNVNENSVEFMYRRHHSLSD KRCLNLLYVGDNQDSQHLREKQLRI----YQ---NIGK-V-VIIKVWNANMGRLNHHEPPAEVMFPLRK--GGSDSKPF------------------IDG FF------------DRPGIANAMNVPCG----DQWVGCIRGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQTSRMKAAAPIYPHIMEYAHG M----VKPFYGISEPKNEAQKNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVGNRCRAELTALKTVQLDVGSNGKTLGK -NIMTPKKLGDRTWLAASPLQ------IGCGVMVQEKKVASPQ-EVE-AAD---IFFYRNMVIQRLTDVVAAVRMQSPQEMRPPVLVCIQYVDSPSLGDD TPQM--MNIAPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKA-DFLCPPAVSEGEH---GAEQRPVFGMQNPACKPWRFVIMNTQTFAKNIIRLLYIKDV- SCNAVLQLLDWQLCYSH-GDWQSLIADSCPTA-ANIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEGQMIMKIFNLDWCEVEKSHEK -SSVDDNYH-VNMLPFAQSNRDPVGHYVQGLED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHKSAIIANQ HEVKGMDWNTGKE-MQQGPK-IVVHLALFYAPSNVLAAEPNIVGQPQVSHVFV-GNDARRMLIVGVSVMPSYKFMREACVPFFHKLFMADNQVRHEYMWN YDIKIYETHFRMGVYAVDNSWETLVYC-EMTSGRIGANINHLPRLPEQVV-PPCILTVSGLKRTI-AGSPDETEITKTLIYHIV--MWADIMFKNCVMQY STYYKENRSVR-----------ANNTGGAELPK---------------------YHQQNILTD-ELYSDM---YPAAPKKYVIRVM-GVGDIGGYEMICE DLFQILVCE ; t15 MIWSEYRSMVREG--SDQSIDNH-C------------LIAARVKMMSGVVRQVRNAPEDAL--YTFFTGQKGTRELIKY-YF-CMEGCGCVNTMAKNS-S QSAPWKFNRFMC-LMHLMQN--------GDL--------------------CNS--PSFLGMCMFHNQVPNKGA--LVLPDNVNENSVEFMYRRHHSLSD KRCLNLLYVGDNQDSQHLREKQLRI----YP---SIGK-V-VIIKVWNAAMGRLNHHEPPAEVMFPLRK--GGSDSKPF------------------IDG FF------------DRPGIANAMSVPCG----DQWVGCIRGWCSSQHQYGLANHILVHGPSKLHRTYAINRKMGANVYQQTSRMKAAAPLYPHIMEYAHG M----VEPFYGISEPKNEAQKNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPVLGDWVGNRCRAELTALKTVQLDIGNSGKTLGQ -NIMTPKRLGERTWLAASPLQ------IGCGVVVQEKKVASPQ-EVEVAAD---IYFYRNMVIQTLTDVVAAVRMQSPQEMRPPVLVCIQDVDSVSLGDD TPQM--MNIAPYWVCVCSGVVKKTQPD-SVDKRGKTWVAKS-DFLCPPAVSEGEH---GAEQRPVFGMRNPICKPWRFVIMNTQTFAKNIIRLLYVKDV- SCNAVLQLLDWQLCYSH-GDWQSLIADSCSTA-ANIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEGQMIVKIFHLDWCEVEKSHEK -SSVDDNYH-VNMLPFAQSNGDPVGHYVQGLED-ALRLIRPGT---ARALVILS-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHKSAIVANQ QEVKGMDWDTGKE-MQQGPK-IVVHLALFYAPSNVLAAEPNIVGQPQVSHVFV-GNDARRMLIVGVSVMPAYKFMREACVPFFHKLFMAENQVRHDYMWN YDIKIYETHFRMGVYAVDNSWETLVYC-EMTSGRIGAKINHLPRLPEQVV-PPCILTVSGLKRTI-AGAPDETEITKTLIYHIV--MWADIMFKNCVMQY STYYKENRSVR-----------GNNTGGAELPK---------------------YHQQNILTD-ELYSDV---YPTAPKKYVIRVM-GVGDIGGYELICE DLFQILVCQ ; t63 MIWSEYRSMVREG--SDQSIDNK-C------------LTAARVKLMSGVVRQVRNAPEDAL--YTFFTGQKGKRELIKY-YF-CMEGCGCVNTMAKNS-S QSAPWKFNRFMC-LMHLMQD--------GDL--------------------CNS--PSFLGMCMFHNQVSFKGE--LVLPDNPNENSVEFMYRRHHSLSD KRCLNLLYVGDEQDSSHLREKQLRL----YA---RIGK-V-VIIKVWNAVMGRLNHHEPPAAVMFPLRK--GGSDSKPF------------------IDG FF------------DRPGIANAMSVPCG----DQWMGIIRGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQDSRMKAAAPLYPHIMEYAHG M----VKPFYGISEPKNEAQKNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRANTMSKPVMGDWVGSRCRAELTALKTVQLDVGSSGKTLGQ -NIMTPKKLGDRTWLAASPLQ------IGCDVMVQEKKVASPQ-EVEVAAD---IYFYRNMVIQRLTDVVAAVRMQSPQEMRLPVLVCIQDVDSPSLGDD TPQK--MNIAPYWVCVCSGVVKKAQLD-SVEKRGKTWVAKS-DFLCPPAVSEGEH---GPEQRPVFGVQNPVCKPWRFVIMNTQTFAANIIRLLYVKDV- SCNAVLQLLDWQLCYSH-EDWQSLIADACPTA-VNIAVCCWKRELMPGL---NHSCEHLAKSVYFKPDGE-------DEGQMFMKIFNLDWCEVEKSHEK -SSVDDNYN-VNMLPFVQSNGDPVGHYVQGLED-ALLLIRPGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHKSAIIANQ QGVKGMDWDTGKE-MQQGPK-IVVHLALFYAPSNVLAAEPNIIGQPQVSHVFV-DNDARRMLIVGQSVMPPYKFMREACVPFFHKLFMAENQVRHDYVWN YDIKIYETHFRMGVYAIDNSWETLVYC-EMTSGRIGAKINHQPRLPEQVV-PPCVLTVSGLKRTI-AGAPDETEITKTLIYHIV--MWADIMFKNCVMQY STYYKENRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDM---YPAAPKKYVIRQM-GMGDIGGYELICE DLFQILVCE ; t50 MIWSEYRSMVREG--SDQSIDNK-C------------LTAARVKLMSGVVRQVRNAPEDAL--YTFFTGQKGTRELIKY-YF-CMEGCGCVNTMAKNS-S QSAPWKFNHFMC-LMHLMQN--------GDL--------------------CNS--PSFLGMCMFHNQVSVKGE--LVLPDNPNENSVEFMYRRHHSLSD KRCLNLLYVGDNQDSSHLREKQLRL----YA---RIGK-V-VIIKVWNAVMGRLNHHEPPAEVMFPLRK--CGSDSKPF------------------IDG FF------------DRPGIANAMSVPCG----DQWMGVIRGWCSSQHQYGLANHVLVHGPSKLHRTYAINKKMGANVYQQDSRMKAAAPLYPHIMEYAHG M----VKPFYGISEPKNEAQKNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRANTMSKPVMGDWVGSRCRAELTALKTVQLDVGSSGKTLGQ -NIMTPKKLGDRTWLAASPLQ------IGCDVMVQEKKVASPQ-EVEVAAD---IYFYRNMVIQRLTDVVAAVRMQSPQEMRLPVLVCIQDVDSPSLGDD TPQK--MNIAPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKS-DFLCPPAVSEGEH---GPEQRPVFGVQNPVCKPWRFVIMNTQT-AKNIIRLLYVKDV- SCNAVLQLLDWQLCYSH-GDWQSLIADACSTA-VNIAVCCWKRELMPGL---NHSVEHLAKSVYFKPDGE-------DEGQMIMKIFNLDWCEVEKSHEK -SSVDDNYN-VNMLPFVQSNGDPVGHYVQGLED-ALLLIRPGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHKSAIMANQ QGVKGMDWDTGKE-MQQGPK-IVMHLALFYAPSNVLAAEPNIIGQPQVSHVFV-DNDARRMLIVGQSVMPAYKFMREACVPFFHKLFMAENQVRHDYVWN YDIKIYETHFRMGVYAVDNSWETLVYC-EMTSGRIGAKINHQPRLPEQVV-PPCILTVSGLKRTI-AGAPDETEITKTLIYHIV--MWADIMFKNCVMQY STYYKENRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDM---YPAAPKKYVIRQM-GMGDIGGYELICE DLFQILVCE ; t25 MIWSEYRSMVREG--SDQSIDNK-C------------LTAARVKMMSGVVRQVRNAPEDAL--YTFFTGQKGTRELIKY-YF-CMEGCGCVNTMAKNS-S QSAPWKFNRFMC-LMHLMQN--------GDL--------------------CNS--PSFLGMCMFVNQVSFKGE--LVLPDNPNENSVEFMYRRHHSLSD KRCLNLLYVGDNQDSSHLREKQLRL----YA---RIGK-V-VIIKVWNAVMGRLNHHEPPAEVVFPLRK--GGSDSKPF------------------IDG FF------------DRPGIANAMSVPCG----DQWMGVIRGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQDSRMKAAAPLYPHIMEYAHG M----VKPFYGISEPKNEAQKNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRANTMSKPVMGDWVGSRCRAELTALKIVQLDVGSSGKTLGQ -NIMTPKKLGDRTWLAASPLQ------IGCDVMVQEKKVASPQ-EVEVAAD---IYFYRNMVIQRLTDVVAAVRMQSPQEMRLPVLVCIQDVDSPSLGDD TPQK--MNIAPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKS-DFLCPPAVSEGEH---GPEQRPVFGMQNPVCKPWRFVIMNTQTFAKNIIRLLYVKDV- SCNAVLQLLDWQLCYSH-GDWQSLIADACPTA-VNIAVCCWKRELMPGL---NHSCEHLAKSVYFKPDGE-------DEGQMIMKIFNLDWCEVEKSHEK -SSVDDNYD-VNMLPFVQSNGDPVGHYVQGLED-ALLLIRPGT---ARALVILF-YGCQYYSGKFQDSELFERSQLYR-VYCEI----KKDHKSTIIANQ QGVKGMDWDTGKE-MQQGPK-IVVHLTLFYAPSNVLAAEPNIFGQPQVSHVFV-DNDARRMLIVGQSVMPAYKFMREACVPFFHKLFMAENQVRHDYVWN YDIKIYETHFRMGVYAVDNSWEALVYC-EMTSGRTGAKINHQPRLPEQVV-PPCILTVSGLKRTI-AGAPDETEITKTLIYHIV--MWADIMFKNCVMQY STYYKENRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDM---YPAAPKKYVIRQM-GMGDIGGYELICE DLFQILVCE ; t51 MIWSEYRSMVREG--SDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YTFFTGQKGTRELIKY-YF-CMEGCGCVNTMAKNS-S QSAPWKFNRFMC-LMHLMQN--------GDL--------------------CNS--PSFLGMCMFHVQVQFKGE--LVLPDHPNENSVEFMYRRHHSLSD KRCLNLLYVGDNQDSQHLREKQLRI----YA---RIGK-V-VIIKVWNAVMGRLNHHEPPAEVVFPLRK--GGSDSKPF------------------IDG FF------------DRPGIANAMSVPCG----DQWVGVIRGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQDSRMKAAAPLYPHIMEYAHG M----VKPFYGISEPKNEAQKNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPIMGDWVGNRCRADLTALKTVQLDVGSSGKTLGQ -NIMTPKNLGDKTWLAASPLQ------IGCGIMVQEKKVASPQ-EVEVAAD---IYFYRNMVIQRLTDVVAAVRMQSPQEMRPPVLVCIQDVDSPSLGDD TPQM--MNIAPYWVCVCSGVVKKAQPD-SVDKRGKTWVAKS-DFLCPPAISEGEH---GAEQRPVFGMQNPVCRPWRFVIMNTQTFAKNVIRLLYVKDV- SCNKVLQLLDWQLCYSH-GDWQSLIADACPAA-VNIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEGQMIMKIFGLDWCEVEKSHEE -SSVDDNYN-VNMLPFVSSNGDPVGHYVQGLED-ALRLIRPGT---ARALVILF-YGCQFYSGKFQDSELFERSQLYR-VYCEI----KKDHKSAIIANQ QGVKGMDWDTGKE-MQQGPK-IVVHLALFYAPSNVLAAEPNIIGQPQVSHVFV-DNDIRRMLIVGVSVMPAYKFMREACVPFFHKLFMAENQVRHDYVWN YDIKIYETHFRMGVYAVDNSWETLVYC-EMTSGRIGAKINHLPRLPEQVF-PPCILTVSGLKRTI-AGAPDETEITKTLIYHIV--MWADIMFKNCVMQY ATYYKENRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDM---YPAAPKKYVIREM-GVGDIGGYELICE DLFQILVCE ; t9 MIWSECRQMVREK--TDNAIDNQ-C------------LIAARVKENSGHVKQVRNDSEDIL--YNHFTGKKGARELIKR-YV-CMEGHDCVNTVANVS-S QSAIWKFDRFMCDLMHLMQN--------GDL--------------------CSY--PSFLPMCMFVPQVLSKGE--LVLPDSSNEESISFVYLQHHSFSV KRCLNYLNVGDSADSKHLKEKHLRA----EGMPGNIGK-H-VIINVWNAVMGRLNHHEPPAEVVFPLRQ--GGPDSKPM------------------IDG FFDK-D-D------DRPGIANAMSVPCG----EQWGGPTRGWCSSQQKFGLASHTLSHDPSSLHRTFAINRKMGANVYQQTSRMKASAPFYPHCMEYAHG V----CKPFYEHSEPKNEAQGNGEK-PMNVPCVESIDCQYENKHATMTKLMHH-SLYLMRMHTMSKPIMGDVNGNRCRADLTALKMLQLDIGFSAKTLGQ -NVVVPHLLGHRTWLATSPLQ------IGCGVMIFGNKIGSAN-EFEAAADPTVIYFYRNRIVRRLADVISTVRMNSPQEMRN------QDVDSHSLGDD DNQA--MVILPYWVCVCSGVVRKEHPDTSINKRGRTWLAKV-DFLCPPAFNQGEH---RAEQKPASKFEHPVCGPWEFVIVNYQTSAKDIILLLYIKDV- GCNCVLELLNWQLCTSH-GDWQSLVADSCVWA-HNVAVCAWKRELVPGL---NHSCERLAKHIYFQPDGE-------DEGKMVLKIFGLDWCEMEKSHQR -SGVDDNYQ-VNMLPFNHSKNNPVGHYVQGLED-ALNLIRPGT---ARALNILF-YGCEVYSGEFQDSES-ERSWIYN-VYCEI----KKDHKSAIIAHK YEHKGMDWDTGKE-AQQGPE-VVHHNLLFF-------AEPNIHGQPGVGHIFV-GNDARRKLIAGVSFMSMVKFMCEACVPFFRKLFMAVGQMRYDYVNN YDIKIWETVFRGGVYAIENSWETLVLC-EMTSGRCGAKMNHLPRLFEQVC-PPCILTVSGLKETK-AGLIDGTEITKTPDIGIC--MWETIHFKNPVMQY CCYYKENRSVR-----------VNNTGGAALPK---------------------YHHQNTLVD-ELYSDV---YPAAPKKK--VCM-GVGDVGGYEVMGE DLFQILVCE ; t47 MIWSECRQMVREK--SDNAIDNQ-C------------QIAARVKRNSGHVKQVRNDSEDIL-CYHHFTGKKGARELIKR-YI-CMEGHDCVNTMANVS-S QSAVWNFDRFMCDLMHLMQN--------GDL--------------------CSY--PSFLPMCMFVSQVLSKGE--LVLPDSSNEESISFVYLQHHSFSD KRCLNYLNVGDNSDSKHLKEKHLRA----QGMPGNIGK-H-VIINVWNAVMGRLNHHESPAEVVFPLRQ--GGPDSKPM------------------IDG FFDK-D-D------DRPGIANAMSVPCG----EQWGGPTRGWCSSQQKFGLASHTLSHDPSNLHRTFAINRKMGANVYQQASRMKASAPFYHHCMEYAHG V----CKPFYEHSEPKNEAQGNGEK-PMNVPCVESRDCQYENKHATMTKLMHQ-SLYLMRMHTMSKPIMGHVNGNRCRADLTALKMLQLDIGFSAKTLCQ -NIVVAHLLGHRTWLATSPLQ------IGCMVMIFGNKIGSAN-EFEAAADPTVIYFYRNRIVRRLADVISTVRMNSPQEMRE------QDVDSHSLGDD DNQA--MNILPYWVCVCSGVVRKEHPDTSINKRGRTWLAKM-DFLCPPAFNQGEH---RAEQKPALKFKHPVCGPWEFVIVNYQTSAKDIILLLYIKDV- GCNCVLELLKWQLCTSH-GDWQSLVADSCVWA-HNVAVCAWKRELVPGL---NHSCDRLAKHIYFQPDGE-------DEGKMILKIFGLDWCEMEKSHQR -SGVDDNVQ-VNMLPFNHSKHNPVGHYVQGLED-ELNLIRPGT---ARALNILF-YGCEYYSGEFQDSES-ERSWIYN-VYCEI----KKDHKSAIMACK YRHKGMDWDTGKE-AEQGPE-VVHHNLLFF-------AEPNIQGQPGVGHIFV-GNDARRKLIAGLSFMAMMKFMCEACVPFFRKLFMAVGHMRYDYVSN YDIKIWETVFRGGVYAIENSWETLVLC-EMTSGRCGAKMNHLPRLFEQVC-PPCILTVSGLKETK-AGIIDGTEITKTPEIGIC--MWDTIEFKNPVMQY CCYYKENRSVR-----------VNNTGGAELPK---------------------YHHQNTLVD-ELYSDI---YPAAPKKK--VCM-GVGDVGGYEVMGE DLFQILVCE ; t60 MIWSECRQMVREK--TDNAIDNQ-C------------QIAARVKENSGHVKQVRNDSEDIL-CYHHFTGKKGARELIKR-YI-CMEGHDCVNTMANVS-S QSAVWNFDRFMCDLMHLMQN--------GDL--------------------CSV--PSFLPMCMFVSQVLHKGE--LVLPDSSNEESISFVYLQHHSFSD KRCLNYLNVGDNADSKHLKEKHLRA----QGMPGNIGK-H-VIINVWNAVMGRLNHHEPPAEVVFPLRQ--GGPDSKPM------------------IDG FFDK-D-D------DRPGIANAMSVPCG----EQWGGPTRGWCSSQQKFGLASHTLSHNPSNLHRTFAINRKMGANVYQQTSRMKASAPFYHHCMEYAHG V----CKPFYEHSEPKNEAQGNGEK-PMNVPCVESIDCQYESKHAAMTKLMHQ-SLYLMRIHTMSKPIMGDVNGNRCRADLTALKMLQLDIGFSAKTLGQ -NIVVPHCLGHRTWLATSPLQ------IGCMVMIFGNKIGSEN-EFEAAADPTVIYFYRNRIVRRLADVISTVRMNSPQEMRN------QDVDSHSLGDD DNQA--MNILPYVVCVCSGVVRKDHPDTSIHKRGRTWLAKV-DFLCPPAFNQGEH---RAEQKPAFKFKHPVCGPWEFVIVNYQTEAKDIILLLYIKDV- GCNCVLELLEWQLCTSH-GDWQSLVADSCVWA-HNVAVCAWKRELVPGL---NHSCERLAKHIYFQPDGE-------DEGKMILKIFGLDWCEMEKSHQR -SGVDDNYQ-VNMLPFNHSKHNPVGHYVQGLED-ALNLIRPGT---ARALNILF-YGCEYYSGEFQDSES-ERSWIYN-VYCEI----KKDHKSAIMAYK FEHKGMDWDTGKE-AEQGPE-VVHHNLLFF-------AEPNIQGQPGVGHIFV-GNDARRKLIAGVSFMAVMKFMCEACVPFFRKLFMAVGQMRYDYMNN YDIKIWETFFRGGVYAIENSWETLVLC-EMTSGRCGAKMNHLPRLFEQVC-PPCILTVSGLKETR-AGIIEGTEITKTPDIGIC--MWDTIHFKNPVMQY CCYYKENRSVR-----------VNNTGGAELPK---------------------YHHQNTLVD-ELYSDI---YPAAPKKK--VCM-GVGEVGGYEVMGE DLFQILVCE ; t30 MIWSECRQMVREK--TDNAIDNQ-C------------QIAARVKENSGHVKQVRNDSEDIL-CYHHFTGKKGARELIKR-YI-CMEGHDCVNTMANVS-S QSAVWNFDRFMCDLMHLMQN--------GDL--------------------CSV--PSFLPMCMFVSQVLHKGE--LVLPDSSNEESISFVYLQHHSFSE KRCLNYLNVGDNADSRHLKEKHLRA----QGMPGNIGK-H-VIISVWNAVMGRLNHHEPPAEVVFPLRQ--GGPDSKPM------------------MDG FFDK-D-D------DRPGIANAMSVPCG----EQWGGPTRGWCSSQQKFGLASHTLSHTPSNLHRTFAINRKMGANVYQQTSRMKASAPFYHHCMEYAHG V----CKPFYEHSEPKNEAQGNGEK-PMNVPCVESIDCQYENKHATMTKLMHQ-SLYLMRMHTMSKPIMGDVNGNRCRADLTALKMLQLDIGFSAKTLGQ -NIIVPHCLGHRTWLATSPLQ------IGCMVMIVGNKIGSEN-EFEAAADPTVIYFYRNRIVRRLADVISTVRMNSPQEMRN------QDVDSHSLGDD DNQA--MNILPYVVCVCSGVVRKEHPDTSISKRGRTWLAKI-DFLCPPAFNQGEH---RAEQKPAFKFKHPCCGPWEFVIVNYQTSAKDIILLLYIKDV- GCNCVLELLKWQLCTSH-GDWQSLVADSCVWA-HNVAVCEWKRELVPGL---NHSCERLAKHIYFQPDGE-------DEGKMILKIFGLDWCEMEKSHQR -SGVDDNYQ-VNMLPFKHSKHNPVGHYVQGLED-ALNLIRPGT---ARALNILF-YGCEYYSGEFQDSES-ERSWIYN-VYCEI----KKDHKSAIMAYK YEHKGMDWDTGKE-AEQGPE-VVHHNLLFF-------AEPNIHGQPGVGHIFV-GNDARRHLIAGVSFMAVVKFMCEACVPFFRKLFMAVGQMRYDYVNN YDIKIWETVFRGGVYAIENSWETLVLC-EMTSGRCGAKMNHLPRLFEQVC-PPCILTVSGLKETR-AGIIDGTEITKTPDIGIC--MWDTIHFKNPVMQY CCYYKENRSVR-----------VNNTGGAELPK---------------------YHDQNTLVD-ELYSDV---YPAAPKKK--VCM-GVGDVGGYEVMGE DLFQILVCE ; t70 MIWSECRQMVREK--NDNAIDNQ-C------------LIAARVKEDSGHVKQVRNASEDIM--YNHFTGKKGARELIKR-YI-CMEGHDCVNTMANVS-S QSAVWKFDRFMCDLMHLMQD--------GDL--------------------CSY--PSFLPMCMFVAQVIAKGE--LVLPDSSTEESISFMYLQHHSFSD KRCLNYLNVGDNEDSKHAKEKFLRG----DG---GIGQ-H-VIINVWNAVMGRLNHHEPPPEVVFPLRQ--GGPDSKPM------------------VDG FFDK-D-D------DRPGIANAMNVPCG----DQWGGPIRGWCSSQHKFGLAFHTLSHDPSKLHRTFAINQKMGANVYQQMSRMKTPAPFYPHVMEYAHG V----CKPFYEHSEPKNEAQGNGEKVPMNVPCVESMDCQYENKHAPMAKLMHQDSLYLMRMHTMSKPLMGDVNGNRCRADLTSLKMLQLDIGFCAKTLGQ -NIVVPKLLGHRTWLATAPLQ------IGCGMMIFGNKIGSTQ-EFEAAADPTVIYFYRNRIVRRMADVISTVRMKSPQEMRG------QDVDSHSLGDD DNQA--MNILPYWVCVCSGVVRKEHPDTSINKRGRTWLAKI-DFLFPPAFNQGEH---HAEQKPVFGFKHPHCGPWEFVICNYQTVAKDIILLLYIKDV- GCNCVLELLAWQLCTSH-GDWQSLVADSCIWA-HNVAVCAWKRELVPGL---NHSCEHLAKHIYFQPDGE-------DEGKMILKIFGLDWCEVERSHQR -SGVDDNYK-VNMLPFSHSKHNPVGHYVQGLGD-ALRLIRPGT---ARALNILF-YGCQYCSGEFQDSEE-ERSWIYN-VYCEI----KKDHKSAILAHK YKHKGMDWDTGKE-MEQGPK-VVNYNLLFY-------AEPNIHGQPRVGHIFV-GNDAHRKLIAGVSFMSIMKFMCEACVPFFRKLFMAVRQMRYMYMNN YDIKIWETAFRGGVYAIENSWETLVLC-EMTSGRSGAKMNHLPRLFEQVC-PPCLLTVSGLKETV-AGIIDGTEITKTPEIGIC--MWDTIHFKNPVMQY CCYYKENRSVR-----------VNNTGGAELPK---------------------YHNQNTLVD-ELYSDV---YPAAPKKQ--HYM-GVGDVGGYEVMGE DLFQILVCE ; t91 MIWSECRQMVREK--NDNAIDNQ-C------------LIAARVKEDSGHVKQVRNASEDIM--YNHFTGKKGARELIKR-YI-CMEGHDCVNTMANVS-S QSAVWKFDRFMCDLMHLMQD--------GDL--------------------CSY--PSFLPMCMFVAQVIAKGE--LVLPDSSTEESISFMYLQHHSFSD KRCLNYLNVGDNEDSKHAKEKFLRG----DG---GIGQ-H-VIINVWNAVMGRLNHHEPPPEVVFPLRQ--GGPDSKPM------------------VDG FFDK-D-D------DRPGIANAMNVPCG----DQWGGPIRGWCSSQHKFGLAFHTLSHDPSKLHRTFAINQKMGANVYQQMSRMKTPAPFYPHVMEYAHG V----CKPFYEHSEPKNEAQGNGEKVPMNVPCVESMDCQYENKHAPMAKLMHQ-SLYLMRMHTMSKPLMGDVNGNRCRADLTSLKMLQLDIGFCAKTLGQ -NIVVPKLLGHRTWLATAPLQ------IGCGMMIFGNKIGSTQ-EFEAAADPTVIYFYRNRIVRRMADVISTVRMKSPQEMRG------QDVDSHSLGDD DNQA--MNILPYWVCVCSGVVRKEHPDTSINKRGRTWLAKI-DFLFPPAFNQGEH---HAEQKPVFGFKHPHCGPWEFVICNYQTVAKDIILLLYIKDV- GCNCVLELLAWQLCTSH-GDWQSLVADSCIWA-HNVAVCAWKRELVPGL---NHSCEHLAKHIYFQPDGE-------DEGKMILKIFGLDWCEVERSHQR -SGVDDNYK-VNMLPFSHSKHNPVGHYVQGLGD-ALRLIRPGT---ARALNILF-YGCQYCSGEFQDSEE-ERSWIYN-VYCEI----KKDHKSAILAHK YKHKGMDWDTGKE-MEQGPK-VVNYNLLFY-------AEPNIHGQPRVGHIFV-GNDAHRKLIAGVSFMSIMKFMCEACVPFFRKLFMAVRQMRYMYMNN YDIKIWETAFRGGVYAIENSWETLVLC-EMTSGRSGAKMNHLPRLFEQVC-PPCLLTVSGLKETV-AGIIDGTEITKTPEIGIC--MWDTIHFKNPVMQY CCYYKENRSVR-----------VNNTGGAELPK---------------------YHNQNTLVD-ELYSDV---YPAAPKKQ--HYM-GVGDVGGYEVMGE DLFQILVCE ; t49 MIWSECRQMVREK--NDNAIDNQ-C------------LIAARVKENSGHVKQVRNASEDIM--YNHFTGNKGACELIKG-YV-CMEGHDCVNTMANIS-S QSAVWKFDRFMCDLMHLMQD--------GDL--------------------CSY--PSFLPMCMFVAQVIAKGE--LVLPDSSNEESISFMYLQHHSFSD KRCLNCLNVGDNEDSKHLKEKFLRA----DG---GIGQ-H-VIINMWNAVMGRLNHHEPPPEVVYPLRQ--GGPNSKPM------------------VDG FFDK-D-D------DRPGIANAMNVPCG----DQWGGPIRGWCSSQHRFGLAFHTLKHDPSKLHRTFAINEKMGANVYQQNSRMKASAPFYPHVMEYAHG V----CKPFYEHSEPKNDAQGNGDKVPMNVPCVESMDCQYENKHAPMAKLMHQ-SLYLMRMYTMSKPIMGDVNGNRCRAELTSLKMLQLDIGFSAKTLGQ -NIVPPKLLGHRTWLATSPLQ------IGCGVMIFGNKIGSTQ-EFESAADPTVIYFYRNRIVKRMADVISTVRMKSPQEMRG------QDVDSHSLGDD DNQA--MNILPYWVCVCSGVVRKEHPDTSINKRGRTWLAKI-DFLFPPAFNQGEH---HAEQKPVFGFKHPHCGPWEFVICNYQTMAKDIILLLYIKDV- GCNCVLELLAWQLCTSH-GDWQSLVADSCIWA-HNVAVCAWKRELVPGL---NHSCEHLAKHIYFQPDGE-------DEGKMIIKIFGLDWCEVEKSHQR -SGVDDNYK-VNMLPFSHSNHNPVGHYVQGLGD-ALRLIRPGT---ARALNILF-YGCEYYSGEFQDSEE-ERSWIYN-VYCEI----KKDHKSAIMAHK YEHKGMDWDTGKE-MEQGPK-VVHHNILFY-------AEPNIHGQPRVGHIFV-GNDAVRNLIAGVSFMSIMKFMCEACVPFFRKLFMAVRQMRYMYMNN YDIKIWETAFRGGVYAMENSWETLVLC-EMTSGRVGARMNHLPRLFEQVC-PPCILTVSGLKETV-AGVIDGTEITKTPEIGIC--MWDTIHFKNPVMQY CCYYKEPRSVR-----------VNNTGGAELPK---------------------YHNFNALVD-ELYSDV---YPAAPKKK--HYM-GVGDVGGYEVMGE DLFQILVCE ; t52 MIWSEERQMVREK--VDNAVDNQ-C------------LIAARVKENCGHVKQVRNASEDIL--YNHFTGKKGARELIKR-YI-CMEGHDCVNTMAHDS-S QSATWKFDRFMCDLMHLMQG--------GDL--------------------CSY--PSFLPVCMFVAQVINKGE--LVLPDSSNEESISFMYLQHHSFSE KRCLNYLNVGDNEDSKHLKEKFLRA----DG---NIGQ-H-VIINMWNAVMGRLNHHEPPAEVNFPLRQ--GGPDSKPM------------------VDG FFDK-D-D------DSPGIANAMNVPCG----DQWGGPIRGWCSSQHKFGLASHTLSHDPSKLHRTFGINKNMGANVYQQTSRMKANAPFYPHVMEYAHG V----CKPFYEHSEPKNEAQGNGEK-PMNVPCVESIECQYENKHATMAKLMHD-SLYLMRMHTMSKPVMGDVNGHRCRADLTALKMLQLDIGFSAKTLGQ -NIVAPKLLGVRTWLATSPLQ------IGCGVMIFGNKIGSTQ-EFEAAADPTVIYFYRNRIIRRVADVISTVRMKSPQEMRE------QDVDSHSLGDD DNQA--MTILPHWVCVVSGVVRKEHPDTSINKRGSTWLAKV-DFLCPPAFNQGEH---HAEQKPVFGFKHPHVGPWEFVICNYQTSAHDIILLLYIKDV- GCNCVLELLVWQLCTSH-GDWQSLVADSCVWA-HNVAVCAWKRELVPGL---NHSCERLAKHIYFQPDGE-------DEGKMILKIFELNWCEVEKSHQH -SGVDDNYQ-VNMLPFDHCKHNPVGHYVQGLED-ALGLIRPGT---ARALNILF-YGCQYYSGEFQDSEA-ERSWIYN-VYCEI----KKDHKSGIMAHK YDHKGVDWDTGKE-MEQGPK-VLHHNLLFY-------AEPNIHGQPTVGHIFV-GNDARRKLIAGVSFMSVVKFMCEACVPFFRKLFMAFRQMRYDYVNN YDIKIWETVFRGGVYAIENSWETLILC-EMTSGRCGAKMNHMPRLFEQVC-PPCILTVSGLKETM-AGVIDGTEITKTPDIGIG--MWDSIHFKNPVMQY CCYYKENRSVR-----------VNNTGGAELPK---------------------YHHQNSLVD-ELYSDV---YPAAPKKK--HYM-GVGDVGGYEVMGE DLFQILVCE ; t43 MIWSEVRQMVREG--SDNAIDNR-C------------LIAARVKHVQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR-YL-CMEGHDCVNTMAKPS-S QSAVWKFDRFMCALMHLMQK--------GDL--------------------CSH--PSFLGMCMFHEQVRCKGE--LVLPDSPNEESVSFVYIQHHSFSD KRCLNCLNVGDNDDSEHLKEKHLRI----YG---QIGH-H-VIINVWNAFMGRLNHHEPPAEVVYPLRK--GGPDSK-M------------------VDG FFDK-E-D------DRPGIANAVSVPCA----DQVGGPIRGWCSSQVNFGLANHTLVQ-PSKLHRTFKINKKMGANVYQQTSRMKAEAPVYPHIMEYAHG V----CKPFYERSEPKNEAQGNGEK-QGNVPCVESMECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAGLTALKVVQLDIGFSSKTLGQ LNIIDCKLLGHRTWLATSPLQ------IGVDVMVMGNKIGSPS-EFEVAADPTIIWFYRNCIVHKLADTVSTAKMKSPQEMRH------QDVDSPSLGDD DQQA--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKN-DFLCPPAFNQGEH---EREQRPVYGFKHPGCRPWQFVIANYQTSAKNIIMLLYVKDV- SCNGVLQLLNWHLCDSH-GDWQSLVADSCCWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVLKIFELDWVEMEKSHQQ -SSVDDNYL-VNMLPFLHSRQNPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCEYYSGQFPDGEA-ERSWIYD-VYCEM----KKDHKSAVIAHK HEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATRKLITGVSVMPTYKFMCGACVPFFHKLFMAVRNMRYDYNVN YDIKIWETHIRHGVYAVENSWETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCVLTVSGLKQPM-AGYNDQTEITKTPDICIC--TWGTIHFKNFVMQY CVYYKENRSVR-----------VNNTGGAELPQ---------------------YHHQNILVD-ELYSDM---YPAAPKKK--HYV-GAGDVGGYEVMSE DLFQILVHE ; t1 MIWSEVRQMMREG--TDNAIDNQ-C------------LIAARVKHMQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR-YL-CMEGHDCVNTMAKPS-S QSAVWKFDRFMCCLMHLMQN--------GDL--------------------CSH--PSFLGMCMFVEQVRCKGE--LVLPDSPNEESLSFVYIQHHSFSD KRCLNCLNVGDNDDSEHLKEKHLRI----YG---QIGR-H-VIINVWNAFMGRLNHHEPPAEVVYPLRK--GGPDSKPV------------------VDG FFDK-E-D------DRPGIANAVSVPCA----DQVGGPIRGWCSSQVKFGLANHTLVQ-PSKLHRTFKINKKMGANVYQQTSRMKAEAPVYPHIMEYAHG V----CKPFYERSEPKNEAQGNGEK-QGNVPCVESMECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAKLTALKIVQLDIGFSAKTLGQ LNIIDCKLLGRHTWLATSPLQ------IGCDMMVMGNKIGSPS-EFEVAADPTIIWFYRDCIVHKLADTVSTAKMKAPQEMRV------QDVDSPSLGDD DQQE--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKN-DFLCPPAFNQGEH---EAEQRPVYGFVHPRCRPWQFVIANYQTSAKNIIMLLYVKDV- SCNGVLQLLNWQLCDSH-GDWQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVLKIFELDWVEMEKSHQQ -SSVDDNYL-VNMLPFMQSRENPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCEYYSGQFPDGEA-ERSWIYD-VYCEM----KKDHKSAVVAHK VEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATRKLITGVSVMPTYKFMCGACVQFFHKLFMAVRNMRYDYTVN YDIKIWETHIRQGVYAVENSWETLITC-EMTSGRIGAKINHLPRLPEQVV-PPCILTVSGLKQPM-AGYNDETQITKTPDICIC--TWGTIHFKNSVMQY CCYYKENRSVR-----------VNNTGGAELPK---------------------YHHQNILVD-ELYSDM---YPAAPKKK--HYV-GAGDVGGYEVMSE DLFQILVHE ; t86 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKPVQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR-YL-CMEGHDCVNTMAKPS-S QSAVWKFDRFMCALMHLMQN--------GDL--------------------CSH--PSFLGMCMFHEQVRRKGE--LVLPDSPNEESVSFVYIQHHSFSD KRCLNCLNVGDNDDSEHLKEKHLRI----YG---QIGR-H-VIINVWNAFMGRLNHHEPPAEAVYPLRK--GGPDSKPM------------------VDG FFDK-E-D------DRPGIANAVSVPCA----DQVGGPIRGWCSSQVKFGLANHTLVQ-PSKLHRTFKINKKMGANVYHQTSRMKAEAPVYPHIMEYAHG V----CKPFYERSEPKNEAQGNGEK-QGNVPCVESMECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAGLTALKIVQLDIGFSAKTLGQ LNIIDCKLLGHRTWLATSPLQ------IGCDVMVMGNKIGSPS-EFEVAADPTIIWFYRNCIVHKLADTVSTAKMKAPQEMRV------QDVDSPSLGDD DQQA--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKN-DFLCPPAFNQGEH---GAEQRPVYGFQHPRCRPWQFVIANYQTFAKNIIMLLYVKDV- SCNRVLQLLNWQLCDSH-GDWQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVLKIFELDWVEMEKSHQQ -SSVDDNYL-VNMLPFIHSRDNPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCEYYSGQFPDGEA-ERSWIYP-VYCEM----KKDHKSAVVAHK AEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATRKLITGVSVMPTYKFMCGACVPFFHKLFMAVRNMRYDYTVN YDIKIWETHIRHGVYAVENSWETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGYNDETQITKTPDICIC--GVGTIHFKNLVMQY CCYYKENRSVR-----------VNNTGGAELPK---------------------YHHQNILVD-ELYSDM---YPAAPKKK--HYV-GAGDVGGYEVMSE DLFQILVHE ; t72 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHVQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR-YL-CMEGHDCVNTMAKVS-S QSAVWKFDRFMCALMHLMQN--------GDL--------------------CSH--PSFLGMCMFHEQVRRKGE--LVLPDSPNEESVSFVYIQHHSFSD KRCLNCLNAGDNDDSEHLKEKHLRT----YG---QIGR-H-VIINVWNAFMGRLNHHEPPAEVVYPLRK--GGPDSKPM------------------VDG FFDK-E-D------HRPGIANAVSVPCA----DQVGGPLRGWCSSQVKFGLANHTLVQ-PSKLHRTFKINKKIGANVYQQTSRMKADAPVYPHIMEYAHG V----YKPFYERSEPKNEAQGNGEK-QGNVPCVESVECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAGLTALKIVQLDIGFSAKTLGQ LNIIDCKLLGHRTWLATSPLQ------IGCDVMVMGNKIGSPS-EFEVAADPTIIWFYRNCIVHKLADTVSTAKMKAPQEMRH------QDVDSPSLGDD DQQA--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKN-DFLCPPAFKQGEH---EAEQRPVYGFQHPRCRPWQFVIANYQSSAKNIIMLLYVKDV- SCNGVLQLLNWQLCDSH-GDWQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGQMVLKIFELDWVEMEKSHQQ -SSVDDNYL-VNMLPFIHSRENPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCEYYSGQFPDGEA-ERSWIYD-VYCEM----KKDHKSAVVAHK VEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATRKLITGFSIMPTYKFMCGACVPFFHKLFMAVRNMRYDYTVN YDIKIWETHIRHGVYAVENSWETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPV-AGYNDETQITKTPDICIC--TWGTIHFKNSVMQY CCYYKENRSVR-----------VNNTGGAELPK---------------------YHHQNILVD-ELYSDM---YPAAPKKK--HYV-GAGDVGGYEVMSE DLFQILVHE ; t96 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHVQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR-YL-CMEGHDCVNTMAKPS-S QSAVWKFDRFMCALMHLMQV--------GDL--------------------CSH--PSFLGMCMFHEQVRRKGE--LVLPDSPNEESVSFVYIQHHSFSD KRCLNCLNVGDNDDSEHLKEKHLRV----YG---QIGR-H-VIINVWNAFMGRLNHHEPPAEVVYPLRK--GGPDSKLM------------------VDG FFDK-E-D------ERPGIANAVSVPCA----DQVGGPIRGWCSSQVKFGLANHTLVQ-PSKLHRTFKINKKIGANVYQQTSRMKAEAPVYPHIMEYAHG V----CKPFYERSEPKNEAQGNGEK-QGNVPCVESMECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAGLTALKIVQLDIGFSAKTLGQ LNIIDCKLLGHRTWLATSPLQ------IGCDVMVIGNKIGSPS-EFEIAADPTIIWFYRNCIVHKLADTVSTAKMKAPQEMRV------QDVDSPSLGDD DQQA--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKN-DFLCPPAFNQGEH---EAEQRPVYGFQHPRCRPWQFVIGNYQTSAKNIIMLLYVKDV- SCNGVLQLLNWQLCDSH-GDWQSLVADSCMWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVLKIFELDWVEMEKSHQQ -SSVDDNYL-VNMLPFIRSRENPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCEYYSGQFPDAEA-ERSWIYD-VYCEM----KKDHKSAVYAHK VEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATRKLISGVSVMPTYKFMCGACVPFFHKLFMAVRNMRYDYTVN YDVKIWETHIRHGVYAVENSWETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGYNMETQITKTPDICIC--TWGTIHFKNSVMQY CCYYKENRSVR-----------VNNTGGAELPK---------------------YHHQNILVD-ELYSDV---YPAAPKKK--HYV-GAGDVGGYEVMSE DLFQILVHG ; t44 MIWSEVRQMVREG--TDNAIDTR-C------------LIAARVKHMQGHVKQVRNANEDAL--YQNFSGQKGAREKIKR-YL-CMEGHDCVNTMAKPS-S QSAVWKFDRFMCDLMHLMQN--------GDL--------------------CSH--PSFLGMCMFHEQVRCKGE--LVLPDSPNEESVSFVYIQHHSFSD KRCLNYLNVGDNDDSEHLKEKHLRI----YG---VIGK-H-VIINVWNAFMGRLNHHEPPADVEYPLRK--GGPDSKPM------------------VDG FFDK-E-D------DRPGIANAVSVPCG----DQVGGPIRGWCSSQVKFGLANHTLAQNPSKLHRTFKINKKMGANVYQQTSRMKAEAPVYPHIMEYAHG V----CKPFYDRSEPKNEAQGNGEK-QGNVPCVESMDCQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGYVDGHRCRAGLTALKIVQLDIGFSAKTLGQ LNIIDCKLLGHRTWLATSPLQ------IGCDIMVMGNKIGSPS-EFEVAADPTIIWFYRNCIVHKLADVVSTAKMKSPQEMRV------QDVDSPSLGDD DQQA--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKN-DFLCPPAFEQGEH---EAEQCPVHGFKHPRCRPWQFVIADYQTSAKNIIMLLYVKDV- SCNGVLQLLNWQLCDSH-GDWQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVLKIFELDWVEMEKSHQQ -SSVDDNYL-VNMLPFMHSRENPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCEYYSGQFPDCEA-ERSWIYD-VYCEM----KKDHKSAVVAHK HEHKGMDWDTGKE-MNQGPK-VVMHGVLFN-------AEPNIHGQPGVSHVFM-GNDATRKLITGVSVMPTYKFMCNACVPFFHKLFMAVRNMRYDYTVN YDIKIWETHMRHGVYAVENSWETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGYNDETEITKTPDICIC--TWGTIHFKNSVMQY CCYYKENRSVR-----------VNNTGGAELPK---------------------YHHQNMLVD-ELYSDM---YPAAPKKK--VYV-GAGDVGGYEVMSE DLFQILVHA ; t45 MIWSEVRQMVREG--TDNAIDNC-C------------LIAARVKHMQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR-YK-CMEGHDCVNTMAKSS-S QSAVWKFDRFMCDLMHLMQN--------GDL--------------------CSV--PSFLGMCMFHEQVRCKGE--LVLPDSPNEESVSFVYIQHHSFSD KRCLNYLNVGDNDDSEHLKEKHLRI----YG---EIGK-H-VIINVWNAFMGRLNHHEPPAEVMYPLRK--GGPESKPM------------------VDG FFDK-E-D------DRPGIANAVSVPCT----DQVGGPIRGWCSSQVKFGLANHTLVHNPSKLHRTFAINKKMGANVYQQTSRMKAEAPVYPHIMEYAHG V----CKPFYERSEPKNEAQGNGEK-QGNVPCVESMDCQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAGLTALKIVQLDIGFSAKTLGQ LNIIDCKLLGHRTWLATSPLQ------IGCDVMVMGNKIGSPA-EFEVAADPTIIWFYRNCIVHKLADLVSTAKMKSPQEMRV------QDVDSPSLGDD DQEA--MDILPYWVCVMSGVVRKEQPD-SVNKRGRTWLAKK-DFLVPPAFNQGEH---EAEQKPVYGFDHPRCRPWQFVIANYQTSAKNIIMLLYVKDV- SCNGVLQLLNWQLCDSH-GDWQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVHKIFELDWVEMEKSHQQ -SSVDDNYL-VNMLPFFHSRENPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCQYYSGQFPDGEA-ERSWIYD-VYCEM----KKDHKSAVVAHK HQHKGMDWDTGKE-MNQGPK-VVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATRKLIPGVSVMHDYKFMCGACVPFFHKLFMAVRNMRYDYTVN YDIKIWETHLRHGVYAVENSWETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGFVDETEITKTPDICIC--TWGTIHFKNSVVQY CCYYKENRSVR-----------VNNTGGAELPK---------------------YHHQNILVD-ELYSDM---YPAAPKKK--HYV-GAGDVGGYEVMSE DLFQILVHE ; t74 MIWSEVRQMVREG--TDNAIDNC-C------------LIAARVKHMQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR-YL-CMEGHDCVNTMAKPS-S QSAVWKFDRFMCDLMHLMQN--------GDL--------------------CSV--PSFLGMCMFHEQVRCKGE--LVLPDSPNEESVSFVYIQHHSFSD KRCLNYLNVGDNDDSEHIKEKHLRI----YG---EIGK-H-VIINVWNAFMGRLNHHEPPAEVMYPLRK--GGPESKPM------------------VDG FFDK-E-D------DRPGIANAVSVPCA----DQVGGPIRGWCSSQVKFGLANHTLVHNPSKLHRTFAINKKMGANVYQQTSRMKAEAPVYPHIMEYAHG V----CKPFYERSEPKNEAQGNGEK-QGNVPCVESMDCQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDGHRCRAGLTALKIVQLDIGFSAKTLGQ LNIIDCKLLGHRTWLATSPLQ------IGCDVMVVGNKIGSPA-GFEVAADPTIIWFYRNCIVHKLADLVSTAKMKSPQEMRV------QDVDSPSLGDD DQEA--MDILPYWVCVTSGVVRKEQPD-SVNKRGRTWLAKK-DFLVPPAFNQGEH---EAEQKPVYGFDHPRCRPWQFVIANYQTSAKNIIMLLYVKDV- SCNGVLQLLNWQLCDSH-GDWQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVHKIFELDWVEMEKSHQQ -SSVDDNYL-VNMLPFFHSRENPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCQYYSGQFPDGEA-ERSWIYD-VYCEM----KKDHKSAVVAHK HQHKGMDWDTGKE-MNQGPK-VVLHGVLFN-------AEPNIHGQPGVSHVFM-GNDATRKLIPGVSVMHDYKFMCGACVPFFHKLFMAVRNMRYDYTVN YDIKIWETHLRHGVYAVENSWETLVTC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGFVDETEITKTPDICIC--TWGTIHFKNSVVQY CCYYKENRSVR-----------VNNTGGAELPK---------------------YHHQNILVD-ELYSDM---YPAAPKKK--HYV-GAGDVGGYEVMSE DLFQILVHE ; t26 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHMSGHVKQVRNANEDAL--YQNFTGKKGAREKIKR-YL-CMEGHECVVTMAKNS-S ISAMWKFDRFMCDLMHLMQN--------GDL--------------------CSF--PSFLGMCMFHDQVRCKGE--LVLPDSKNEESVSFVYIQHHSFSD KRCLNYLNVGDNEDSEHLKEKHLRV----YG---NIGK-H-VIINSWNAMMGRLNHHEPAAEVVYPLRK--GGPESKPM------------------VDG FFDK-E-D------DRPGIANAVSVPCD----DQWGGPVRGWCSSQHKFGLADHTLIHIPSKLHRTFAINKKMGANVYQQTSRMKAAAPVYSHVMEYAHK V----VKPFYARSEPKNEAQGNGEK-QWNVPCVESMDCQYENKHATMAKLMHP-PLYLMRMQTMAKPPMGDVDGHRCRAGLTALKIVQLDIGFSQKTLGQ -NIIDSKHLGHRTWLATSPLQ------IGCSAMVMGNKIGSPQ-EVELAADPTVIYFYRNCIVQKMADVVSTVKMKSPQEMRV------QDVDSASLGDD DQQP--MNIIPYWVCVTSGVVHKEQPD-SVNKRGRTWTAKN-DFLCPDAFNQGEH---GAEQKPTHGFEHPRCRPWQFVIPVYQTGAKNIILLLYIKDV- SVNGVLQLLIWQLCDSH-GDWQSLVADSCAWA-VNVAVCGWKRELVPGL---NHACEMLAKTVYFEPDGE-------DEGKMVIKIFGLDWCEIEKSHQQ -SSVDDNYL-VNMLPFTHSRETPVGHYVQGLED-PMHLMRPGT---ARALNILF-YGCEYYSGQFPDGEP-ERSWIYD-VYCEL----KKDHKNAIVAVK HEHKGMDWDTGKE-MNHGPK-VIVHGVLFH-------AEPNIGGQPGVSHVFV-GNDATRKLIAGVSVMPIVKFMCGACVPFFRKLFMALRNMRYDYASN YDIKIVETHLREGVYAVENSWETLVVC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQSV-SGYNDETEITKTPDIKIC--TWGTIHFKNSVMQY CCYYKENRSVR-----------LNNTGGAELPK---------------------YHHQNVLVD-ELYGEA---YPAAPKKK--HYM-GAGDVGGYEVMSE DLFQILVHE ; t61 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHMSGHVKQVRNANEDAL--YQNFTGQKGAREKIKR-YL-CMEGHECVNTMAKNS-S VSAVWKFDQFMCDLMHLMQN--------GDL--------------------CSF--PSFLGMCMFHEQVRCKGE--LVLSDSKNEESVSFVYIQHPSFSD KRCLNYLNVGDNEDSEHLKEKVLRV----YG---DIGK-H-VIINVWNAMMGRLNHHEPAAEVVCPLRK--GGPDSKPM------------------VDG FFDK-E-D------DRPGVANAVSVPCD----DQWGGPVRGWCSSQHKFGLADHTLVHIPSKLHRTFAITKKMGANVYQQTSRMKATAPVYPHVMEYAHG V----VKPFYDRSEPKNEAQGNGEK-QWNVPCVESMDCQYENKHATMAKLMHP-PLYLVKMQTMSKPPMGDVDGHRCRAGLTALKIVQLDIGFSSKTLGQ -NIIHSKLLGHRTWLATSPLQ------IGCDVMVMGNKIGSPQ-EVELAADPSVIYFYQNCIVQKMADVVSTVKMKSPQEMRV------QDVDSASLGDD DQQP--MNIIPYWVCVTSGVVRKEQPD-SVNKRGHTWTAKN-DFLCPPAYNQGEH---GAEQKPVHGFEHPRCRPWQFVISVYRTGAKNINLLLYIKDV- SCNGVLQLLNWQLCDSH-GDWQSLVADSCAWA-VNVAVCGWKRELVPGL---NHACEMLAKSVYFEPDGE-------DEGKMVLKIFGLDWCEIEKSHQQ -SSVDDNYL-VNMLPFAHSRQTPVGHYVQGLED-PMHLMRPGT---ARALNILF-YGCEYYSGHFPDGEA-ERSWIYD-MYCEL----KKDHKSAIVAVK HCHKGMDWDTGKE-MNHGPK-VIIHGLLFH-------AEPNIGGQPGVSHVFV-GNDATRKLIAGASVMFIVKFMCGACVPFFRKLFMALRNMRYDYSQN YDIKIVETHLREGVYAIENSWETLVVC-EMTSGRMGAKINHLPRLPEQVI-PPCILTVSGLKRAV-NGYNDETEITKTPDIKIC--TWGTIHFKNSVMQY CCYYKENRSVR-----------FNNTGGAELPK---------------------YHHQNVLVD-ELYSEL---YPAAPKKA--HYM-GAGDIGGYEVMSE DLFQILVHE ; t97 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHMSGHMKQVRNANEDAL--YQNFTGQKGAREKIKR-YL-CMEGHECVNTMAKNS-S VSAVWKFDRFMCDLMHLMQN--------GDL--------------------CSF--PSFLGMCMFHQQVRCKGE--LVLPDSKNEESVSFVYIQHHSFND KRCLNYLNVGDNEDSEHLEEKHLRV----YG---NIGK-H-VIINVWNAMMGRLNHHEPAAEVVCPLRK--GGPDSKPM------------------VDG FFDK-E-D------DRPGVANAMSVPCD----DQWGGPVRGWCSSQHKFGLADHTLVHVPSKLHRTFPINKKMGANVYQQTSRMKATAPVYPHVMEYAHG V----IKPFYDRSEPKNEAQGNGEK-QWNVPCVESMDCQYENKHATMAKLMHP-PLYLVKMQTMSKPPMGDVDGHRCRAGLTALQIVQLDIGFSAKTLGQ -NIIHSKLLGHRTWLATSPLQ------IGCDVVVMGNKIGSPQ-EVELAADPSVIYFYQNCIVQKMADVVSTVKMKSPQEMRV------QDVDSASLGDD DQQP--MNIIPYWVCVTSGVVRKEQPD-SVNKRGRTWSAKN-DFLCPPAFNQGEH---GAEQKPVHEFQHPRCRPWQFVISVYRTGAKNINLLLYIKDV- SCNGVLQLLNWQLCDSH-GDWQSLVADSCAWA-VNLAVCGWKRELVPGL---NHACEMLAKSVYFEPDGE-------DEGKMNLKIFGLDWCEIEKSHQQ -SSVDDNYL-VNMLPFTHSRETPVGHYVQDLED-PMHLMRPGT---ARALNILF-YGCEYYSGHFPDGEP-ERSWIYD-MYCEL----KKDHKSAIVAVK HCHKGMDWDTGKE-MNHGPK-VIIHGVLFH-------AEPNIGGQPGVSHVFV-GNDATRKLIAGVSVMPIVKFMCGACVPFFRKLFMALRNMRYDYSQN YDIKIVETHLREGVYAVENSWETLVVC-EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQAV-NDCNDETEITKTPDIKIC--TWPTIHFKNGMMQY CCYYKENRSVR-----------FNNTGGAELPK---------------------YHHQNVLVD-ELYSEV---YPAAPKKA--FYM-GAGDIGGYEVMSE DLFQILVHK ; t17 MIWSEVRQMVREG--ADNAIDNR-C------------LIAARVKHFSGHTKQVRNANEDAL--YQNFTGQKGAREKIKR-YL-CMEGQDCVNTMAKSS-S QSAVWKFDRFMCDLVHLMQN---GARGSGDL--------------------CSF--PSFLGMCMFHEQVRCKGE--LVLPDSKNEESVSFVYIQHHSFSD KRCLNELNVGDNEDSIHLKEKHLRI----YG---NIGK-V-VIINVWNAMMGRLNHHEPAAEVVYPLRK--GGPDSKPM------------------VDG FFDK-E-D------DRPGIANAVAVPCS----DQWGGPVRGWCSSQHKFGLADHTLVHVPSLLHRTFAINRKMGANVYQQTSRMKAAAPVYPHMMEYAHG V----VKPFYERSEPKNEAQGNGEK-QWNVPCVQSVDCQYENKHATMAKLMHP-PLYLMRMETMSKPPMGDVDGVRCRAGLTALKIVQLDIGFSAKTLGQ -NIIDSKLLGHRTWLATSPLQ------IGCDVMVVGNKIGSPQ-EFELAADPTVIYFYRNCIVQKLADVVSTVKMKSPQEMRV------QDVDSPSLGDD NQQS--MNIIPYWVCVHSGVVQKEQPD-SVHKRGRTWTAKN-DFLCPPAFDQGEH---GAEQKPVHGFEHPRCRPWQFVIANYQTGAKNIILLLYVKDV- SCNGVLQLLNWQLCDSH-GDWQSLSADSCAWA-HNVAVCGWKRELVPGL---NHACEMLAKTVYFEPDGE-------DEGKMVIKIFGLDWCEMEKAHQQ -SSVDDNYL-VNMLPFTHSRENPVGHYVQGLED-PMHLIRPGT---ARALNILF-YGCEYYSGQFPDGEP-ERSWIYD-VYCEL----KKDHKSAIVAVK HEHKGMDWDTGKE-MNQGPK-VIIHGVLFH-------AEVNIHGQPGVSHVFV-GNDATRKLIAGVSVMPFVKFMCGACVPFFRKLFMALRNMRYDYTSN YDIKIIETHLRNGVYAVENSWETLVVC-EMTSGRIGAKINHLPRLPEQVI-PPCMLTVSGLKQAM-AGVNDETEITKTPDIKIC--TWGTIHFKNFVMQY CVYYKENRSVR-----------FNNTGGAELPK---------------------YHHQNALVD-ELYSDV---YPAAPKKK--HYM-GEGDVGGYEVMSE DLFQILVHA ; t11 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHFSGHTKQVRNANEDAL--YQNFTGQKGAREKIKR-YL-CMEGQDCVNTMAKSS-S QSAVWKFDRFMCDLVHLMQN---GARGSGDL--------------------CSF--PSFLGMCMFHEQVRCKGE--LVLPDSKNEESVSFVYIQHHSFSD KRCLNSLNVGDNEDSIHLKEKHLRI----YG---DIGK-H-VIINVWNAMMGRLNHHEPAAEVVYPLRK--GGPDSKPM------------------VDG FFDK-E-D------DRPGIANAVAVPCS----DQWGGPVRGWCSSQHKFGLADHTLVHVPSLLHRTFAINKKMGANVYQQTSRMKAAAPVYPHMMEYAHG V----VKPFYERSEPKNEAQGNGEK-QWNVPCVQSVDCQYENKHATMAKLMHP-PLYLMRMETMSKPPMGDVDGVRCRAGLTALKIVQLDIGFSAKTLGQ -NIIDSKLLGHRTWLATSPLQ------IGCDVMVVGNKIGSPQ-EFELAADPTVIYFYRNCIVQKLADVVSTVKMKSPQEMRV------QDVDSPSLGDD NQQS--MNIIPYWVCVHSGVVQKEQPD-SVHKRGRTWTAKN-DFLCPPAFDQGEH---GAEQKPVHGFEHPRCRPWQFVIANYQTGAKNIILLLYVKDV- SCNGVLQLLNWQLCDSH-GDWQSLSADSCAWA-HNVAVCGWKRELVPGL---NHACEMLAKTVYFEPDGE-------DEGKMVIKIFGLDWCEMEKAHQQ -SSVDDNYL-VNMLPFTHSRENPVGHYVQGLED-PMHLIRPGT---ARALNILF-YGCEYYSGQFPDGEP-ERSWIYD-VYCEL----KKDHKSAIVAVK HEHKGMDWDTGKE-MNQGPK-VIIHGILFH-------AEVNIHGQPGVSHVFV-GNDATRKLIAGVSVMPFVKFMCGACVPFFRKLFMALRNMRYDYTSN YDIKIMETHLRNGVYAVENSWETLVVC-EMTSGRIGAKINHLPRLPEQVI-PPCMLTVSGLKQAM-AGVNDETEITKTPDIKIC--TWGTIHFKNFVMQY CVYYKENRSVR-----------FNNTGGAELPK---------------------YHHQNVLVD-ELYSDG---YPAAPKKK--HYM-GEGDVGGYEVMSE DLFQILVHA ; t23 MIWSECQRMLREH--EDQAIANE-C------------LIAFKVKLVNGHVKQVRNASEDAH--YTFFAGEKGARELIKR-YS-CMEGDGCVNTMAEGR-S ESAVWKFDPFMCQLTHSMQS--------GDL--------------------CSV--PSFLGVCMFQRQVVMKGE--LVLPDSADEGSVSFMYIQHHSFAA KRCLNVLNVGDNEDSVHLRHKQLRV----YG---KIGK-H-VIISVPNAIMGRLNHHEPSAIVVFPLRQ--RGADSKSV------------------GDG FYDK-D-E------DRPGIANAVSVPVA----EQWVGSVRGWYSSEHKYGLANHILI-GPSKLHRTYQTTAKMGANVVKATNRMKRPQPVYPHVMEYANG V----VKPFYEVAESKNEAQGNGEK-PVNVPCVESPDCQYESKHARVNKLMHP-SLYLMCMEAMNKPIMGDWDGNRCRSPLCLLKVIQLDMGVSGKTLGQ -NIVMAKLLGERTWLATSPLQ------IGCDVVAVGKKPESPQ-EFECAADPTVIYFYKNLIIQQAADYVSAVQVKSPQEMRY------QDVNSPSDGDE NGQS--MHIGPYWVCVSSEVVKKSQPD-SVDKRGRTWVAKN-EFLCPPDHMQGEHSIEGAEQKPFFGFAGPFPQPWQFVIVNPQTWAHNIIRLLYCKDV- SCNCVLTCLNWVLCDSH-GDWHSLIADACPCA-HNVAVCGWKRELVPGL---NHSNEHMAKTIYFEPDGH-------DEGKMILNIFGLDWCEVETSHQE -SSSDDNHL-VNMLPFGVSRDDPVGHYMLGLED-AIRLYRPGT---ARALNILF---VNYYSGDFQDPEL-ERSQLYN-VYCEQ----KQDHRNAIRASK NDHKGMAWDTAKD-MEQGPK-MVEHQVLFY-------AEPNMHGQPEVIHIFI-GNDAMCMLIPGMSVMVHHKFMRAACMPFFNKLFMAVRYMRYDYVDN YDIKIDETRYRDGVYARRNSWETANVC-EMSSGRSGAKINHNPRLPEQVT-PPRSLTVSGLKDTM-PGHPDVTEITKTPVIAIV--MWGTINLKNHVMQY CCYYKETRSVR-----------VNNTGGAELPK---------------------YHYQNILVD-ELYSND---YPAAPKKA--QFM-GVGDVGGYEIVCE DIFQILVCD ; t76 LSSSHEVRVVNHKSKPDEHIADK-D------------MCADAMCNGSGHTGRIHNAVEGGI--VTNFHGF---AEMPKN-HD-VMEEQGGPGTCLAPQQS ADTSWPFVMFTVDLAHRCRG--------GDMPQNTGDSMTCPHLAEGLAAVNS---RAFIGVSIYDLKVSHKMI--QACNPIQN---------DNHAFSD QRCLNDLSQGVVGASFPKMQHGVRH----YK---K--------RVYENEVGGRLDDAEAIDDMGVWIRW--EGVKHAWCHPIGSCPDHLVCSLLVLQPAR NFIQLS-MVRDTPHRLPKMASAAPVYKQTCHLEQVQGTARMWGKSGPRVVLNGHMLK-TQNTLHRQYLVDVELQATMVFVAARMKTDSSMYMSVREILHG ------------CGIKDEAQADGQR-PATLIVVLSKDNKGITEHGAVLKKIHP----------LCKRCMENGRCLRYKNDLTGDQLVQLAD----NTCAW -NLFEC--LRDSKSLFGSPLFIKVDEDRGFTAP-------SK-------VEPKCAQFYSKSCTHC----------------TM------CSVGSHASEED ALDLYTHKPVPDAQCFVSRVARNIPEH-SPCK-----------------------------P---CS-----------------VEEVCYCKVCDSKDVR AVKNAYQDLQVPLSKLK-APWLSMGHCECWEEDINNILSVVKHELVDDVDRMNRLTEVAAKMAYFGPDGFHWDVELWEENDLNCDDFELGW-NLKH---- ---EDDHPL-LCIGSFSVHKYVSVMVYPLPMND-CVRMSQPCHAAHAQDIPTEQ---TRYQIHTFLDDSI-RRDLCNQ-GCHEENMVWRDDLKDPISTEV IADKLHEWPTNVN-KENSAD----HRQLFH-------ANSSALDKHQHNVVMN-GSPCIRALIIGGSSVGVNHFMMGPCQEFFTDLFMMYEGLQYACVCA MNITIQCLHTGEGVMCNVKCKEFLQREDEMKAGLIGIICNHLSRMIMVIL-VHCLLTHAGLKSME-IGFDR-TGVVRMPLIAEP--LYLVI-YVNAVEPY TDAYKKPKSMHQFQFDDCQIRYRTNTGFEETPVGATHLTHVCVCVPHPWTKGKSEILQNMGSA-VLYNDVIRDHNASEQKE--AHV-PMGDEGRISRAKD EIMYIRDLE ; t53 LSSSHEVRVVNHKSKPDEHIADK-D------------MCADAMCNGSGHTGRIHNAVEGGI--VTNFHGF---AEMPKN-YD-VMEEQGGPGTCLAPQQS ADTSWPFVMFTVDLAHRCRG--------GDIPQNTGDSMTCPHLAEGLAAVNS---RAFIGVSTYDLKVSHKMI--QACNPIQN---------DNHAFSD QRCLNDLAQGVVGASHPKMQHGVRH----YK---K--------RVYENEVGGRLDDAEAIDDMGVWIRW--EGVKHAWCHPIGSCPDHLVCSLLVLQPAR NFIQLS-MVRDTPHGLPKMASAAPVYKQTCHLEQVQGTARMWGKSGPRVVLNGHMLK-TQNTLHRQYLVDVELGATMVFVAARMKTDLSMYMSVREILHG ------------CGIKDEAQADGQR-PATLIVVLSKDNKGITEHGAVLKKIHP----------LCKRCMENGRCLRYKNDLAGDQLVQLAD----NTCAW -NLFEC--LRDSKSLFGSPLFIKVDEDRGFTAP-------SK-------VEPKCAQFYSKSCTHC----------------TM------CSVGSHASEED ALDLYTHRPVPDAQCFVSRVARNIPEH-SPCK-----------------------------P---CS-----------------VEEVCYCKVCDSKDVR AVKNAYQDLQVPLSKLK-APWLSMGHCECWEEDINNILSVVKHELVDDVDRMNRLTEVAAKMAYFGPDGFHWDVELWEENDLNCDDFELGW-NLKP---- ---EDDHPL-LCIGSFSVHKYVSVMVYPLPMND-CVRMSQPCHAAHAQDIPTEQ---TRYQIHTFLDDSI-RRDLCNQ-GCHEENMVWRDDLKDPISTEV IADKLHEWPTNQN-KENSAD----HRQLFH-------ANSSALDKHQHNVVMN-GSPCIRALIIGGSSVGVNHFMMGPCQEFFTDLFMMYEGLQYACVCA MNITIQCLHTGEGVVCNVKCKEFLQREDEMKAGLIGIICNHLSRMIMVIL-VHCLLTHAGLKSME-IGFDR-TGVVRMPLIAEP--LYLVI-YVNAVEPY TDAYKKPKSMHQFQFDDCQIRYRTNTGFEETPMGATHLTHVCVCVPHPWTKGKSEILQNMGSA-VLYNDVIRDHNASEQKE--AHV-PMGDQGRISRAKD EIMYIRDLE ; t16 LSSSHEVRVVNHKSKPDEHIADK-D------------MCADAMCNGSGHTGRIHNAVEGGI--VTNFHGF---AEMPKN-YD-VMEEQGGPGTCLAPQQS ADTSWPFVMFTVDLAHRCRG--------GDIPQNTGDSMTCPHLAEGLAAVNS---RAFIGVSTYDLKVSHKMI--QACNPIQN---------DNHAFSD QRCLNDLAQGVVGASHPKMQHGVRH----YK---K--------RVYENEVGGRLDDAEAIDDMGVWIRW--EGVKHAWCHPIGSCPDHLVCSLLVLQPAR NFIQLS-MVRDTPHGLPKMASAAPVYKQTCHLEQVQGTARMWGKSGPRVVLNGHMLK-TQNTLHRQYLVDVELGATMVFVAARMKTDLSMYMSVREILHG ------------CGIKDEAQADGQR-PATLIVVLSKDNKGITEHGAVLKKIHP----------LCKRCMENGRCLRYKNDLAGDQLVQLAD----NTCLW -NLFEC--LRDSKSLFGSPLFIKVDEDRGFTAP-------SK-------VEPKCAQFYSKSCTHC----------------TM------CSVGSHASEED ALDLYTHKPVPDAQCFVSRVARNIPEH-SPCK-----------------------------P---CS-----------------VEEVCYCKVCDSKDVR AVKNAYQDLQVPLSKLK-APWLSMGHCECWEEDINNILSVVKHELVDDVDRMNRLTEVAAKMAYFGPDGFHWDVELWEENDLNCDDFELGW-NLKP---- ---EDDHPL-LCIGSFSVHKYVSVMVYPLPMND-CVRMSQPCHAAHAQDIPTEQ---TRYQIHTFLDDSI-RRDLCNQ-GCHEENMVWRDDLKDPISTEV IADKLHEWPTNQN-KENSAD----HRQLFH-------ANSSALDKHQHNVVMN-GSPCIRALIIGGSSVGVNHFMMGPCQEFFTDLFMMYEGLQYACVCA MNITIQCLHTGEGVVCNVKCKEFLQREDEMKAGLIGIICNHLSRMIMVIL-VHCLLTHAGLKSME-IGFDR-TGVVRMPLIAEP--LYLVI-YVNAVEPY TDAYKKPKSMHQFQFDDCQIRYRTNTGFEETPMGATHLTHVCVCVPHPWTKGKSEILQNMGSA-VLYNDVIRDYNASEQKE--AHV-PMGDQGRISRAKD EIMYIRDLE ; t42 LSSSHEMRVVHHKSKPDEEIADQ-D------------LCAEEMCKGSGHTGRIHSAVEGGI--WTNFHGF---AEMPKN-YD-VMEEQGGPGTCIAPRQS ADTSWPFMVFTVDLAHRCRG--------GDTPTQAGDSMTFPHLAEGLPAVNS---RAFVGVSSYDLRVSHKMI--QACTLIQN---------DNVAFSD QRPLNDLNQGCVGTSHPKMPHGVRH----YK---K--------KVYENEVGDRLDDAEGIDDVGVWMRW--NGVKHAWCHIIGSCPDHLVCSLLVLQPGR NFIQLS-VVHDTPHRLPKMASAAAVFKQTCHLEQVPGTARMWGKSGPRVRLNGHVLK-NQNILHRQYLVDVNLGATMVFVLARMKTDASMYMSHREILHG GTFDAAKPFRQICGTKDEAQPDGRR-PATLLVVLSKDNQGITEHGAVLKHVHP----------LCKKDCNNNRCLRCKNVLAGNQLIQLSD----VTCAW -NLFEC--LGDSESLFGSPLAIKVDEDRGFTAP-------SK-------VEPKCAQFYSKSCTHH----------------MM------CSVGSNASEED ALELYTHKPVPDPQCFVSIVVRNIPEH-SPCK-----------------------------P---CS-----------------VEEVAYCKNCDSKDVR AVQNAYQDLAVPLSKLK-APWLSMGHCECWEEDINNILSMVKHELVVDQDMVNRSPEVAAKMAYFGPDGFHWDVELCEESDLTVDDFELGW-VLKP---- ---EDDHPL-ICIGSFSVHKQFSVMVYPLPMND-AIRMSQPCHAAHAQDIHTDQ---DRYDVRTFLGHSI-KCHMCNQ-ACHEENIVWRDDLKDPITTEV IAEKLQEWPTNQN-KENVAD----HRQLFH-------ANSSALDKHRHNVMMN-GSACIRTLIIGGSIVGVNVFMMEPCQEFFTDLFMVYEGLQYAVVCA VNIIIQCLHTNEGVVCNVKCKEFLQREEDMKSGLIGIICNHISRMCMHIL-VVCLLTHCGLKAME-IPFDI-TGVVRGPLITEP--LYLII-YVNAVEPY TDAYKKPKSMHQFQFDDVQIRYRTNTGFEENPKLATHLVHVCVVVPHPWTKGKSEILQNMESA-HLYNAVIRDNNASEQKE--AHV-PVGDEGRISRAKN EILYIRDLE ; t24 QSSSHEMRVVHHKSKPDEEITDQ-D------------LCAEEMCKGSGHTGRIHSAVEGGI--WTNFHGF---AEMPKN-YD-VMEEQGGPGTCVAPRQS ADTNWPFMVFPVELAHRCRG--------GDTP--------FPHLAEGLPAVNS---RAFIGVSSYDLKVSHKMI--QACNPIQN---------DNVAFSD QRVLNDLSQGCVGTSHPKMPHGVRH----YK---K--------RVYENEVGDRLDDAEAVDDVGVWMRW--AGVKVAVCHVIGSCPDHLVVSLLVLQPAR NFIQLS-VVHDTPHRLPKMASAAAVFKQTCHLEQVAGTARVWGKSGPRVKLNGHVLK-NQNVLHRQYLVDVDLGATMVFVAARMKTDASMYMSHREILHG GTFDAAKPFRQICGVKDEAQPDGQR-PATLLVVLSKDNVGITEHGAVLKHVHP----------LCKKDCNNNRCLRCKNVLAGNQLVQLAD----ITCAW -NLFEC--LGDSESLFGSPLAIKVDEDRGFTAP-------SK-------VEPKCQQFYSKSCTHC----------------VM------CSVGSNASEED ALELYTHKPVPDPQCFVSIVVRNIPEH-SPCK-----------------------------P---CS-----------------VEEVCYCKNCDSKDVR ATQNAYQELAIPLSKLK-APWLSMGHCECWEEGINNILSMVKHELVVDQDMVNRSPEVAAKMAYFGPDGFHWDIELCEENDLTCDDFELGW-VLKP---- ---EDDHPL-ICIGSFSVHKQFSVMVYPLPMND-AIRMSQPCYAAHAQDIHTDQ---DRYDIRTFLEHSI-RCHLCNQ-ACHEENIVWRDDLKDPITTEV IADKLQEWPTNQN-AENVAD----HRQLFH-------ANSSCLDRHRHNVMMN-GSSCIRTLIIGGSIVGVNVFMMGPCQEFFTDLFMVYEGLQYAVVCA INIIIQVLHTNEGVVCNVKCKEFLQREDDMKSGLIGIICNHISRMCMHIL-VHCLLTHCGLKAME-IPFDR-TGVVRGPLIAEP--LYLII-YVNAVEPY TDAYKKPKSMHQFQFDDVQIRYRTNTGFEENPKLATHLVHVCVVVPHPWTKGKSEILQNMESARHLYNAVIRDNNASHQKE--AHV-PAGDQGRISRAKN EILYIRDLE bpp-seq-2.1.0/test/example.fasta000644 000000 000000 00000363436 12147656566 016532 0ustar00rootroot000000 000000 >t73 LIVSQIRVMVRDG--IHKAMDEE-V------------LIARRVKPYSGNGTQVRNDVEDAN--CQEFVGI---RELGKH -YK-CMDGFHTVNNGAGEN-SESAMWIFDLWMCKLNHGMQR--------GDIGRVKRDVQKFPKLKEGAPNCSSFVKPY FMGCDMFHKQVEYRGTHGLVLDDTWNEESATFPYPQVHSRSDKRCLNGLHQGDHEESVH---HAPRI----MR---LIG H-HLVIIVLMNHDMGRLNHLEPDAVIPSPLRIGGDGWVPKPI------------------EDGFFDK-D-Q------SR PDVANASIVPDK----DQWVGAHNQWGSSLRKVGLAVHDLR-NISDLYRCVWMDCHLGANHYRQISRMKMITPTYTHMT EYANGL----WHPFYKASDHKNEAQGVGEK-PHQLPCVESETCQYETKHAKVCKQVHG-ALYLIGMKTVVKPLLGDWEG CKCRS---KLYVSQLDH----KTLSK-NLGLY--MQKRTWLATAPLQ------IGCMFMLVGRKKNSEE-NYNKAADPT VIWFYTQPIVYV-ADVFGCTKGKNPQEMRG------NNVMSESLGDDNLQD--MSGVPVTVCTSSVMVRKDMQD-SVDK RGCTWNAKE-DHLCPSSFCKGER---EDEPGGVTQ-----------------RCIENIAKLLYIKDV-LCNEVLNGLQW QLCWSV-GDWESLIPQACWDAKKDLAVCAWKMELVPGL---NRNNENLAKVIYFGPDGH-------DEGPMQAKIVTLH W-EMDVSHRG-SSVDDNVV-VVMLAFAVSFCHPWGHYIQGLGD-QHKLARPNT---AQKLSILT---VFHFSGGFRDKP M-ERSQLHS-TYSED----KKDQKVLIHAMK-RNKGLDWHAGND-MKGGPK-IIYLGMLFY-------AENNVAKQKAH FVVFL-ISDATRLLIVHEGCMIDYTFMEFACHPFFAELFMEHMVARYQYYSNVDIKIFDTCFRSAVYAVDNSWETLCDY -EMLSGYFGAEINHNPRLPEQVC-PPCTITVSGLKGKLHNRWPVLTQITRTMRIQIAIRVSGSE-SENVVLQYCLYYEE NKSIR-----------QNNTGSADLPK---------------------DRLVNSFVD-ELYNSI---YTAAPKKK--RH L-GIGDNGGMELVREDIFQILVCV >t66 LIVSQIRVMVRDG--IHKAMDEE-V------------LIARRVKPCSGNGTQVRNDVEDAN--CQEFVGI---RELGKH -YK-CMDGFHTVNNGAGEN-SESAMWIFDLWMCKLNHGMQR--------GDIGRVKRDVQKFPKLKEGAPNCSSFVKPY FMGCDMFHKQVEYRGTHGLVLDDTWNEESATFPYPQVHSRSDKRCLNGLHQGDHEESVH---HAPRI----MR---LIG H-HLVIIVLMNHDMGRLNHLEPDAVIPSPLRIGGDGWVPKPI------------------EDGFFDK-D-Q------SR PDVANASIVPDK----DQWVGAHNQWGSSLRKVGLAVHDLR-NISDLYRCVWMDCHLGANHYRQISRMKMITPTYTHMT EYANGL----WHPFYKASDHKNEAQGVGEK-PHQLPCVESETCQYETKHAKVCKQVHG-ALYLIGMKTVVKPLLGDWEG CKCRS---KLYVSQLDH----KTLSK-NLGLY--MQKRTWLATAPLQ------IGCMFMLVGRKKNSEE-NYNKAADPT VIWFYTQPIVYV-ADVFGCTKRKNPQEMRG------NNVMSESLGDDNLQD--MSGVPVTVCTSSVMVRKDMQD-SVDK RGCTWNAKE-DHLCPSSFCKGER---EDEPGGVTQ-----------------RCIENIAKLLYIKDV-LCNEVLNGLQW QLCWSV-GDWESLIPQACWDAKKDLAVCAWKMELVPGL---NRNNENLAKVIYFGPDGH-------DEGPMQAKIVTLH W-EMDVSHRG-SSVDDNVV-VVMLAFAVSFCHPWGHYIQGLGD-QHKLARPNT---AQKLSILT---VFHFSGGFRDKP M-ERSQLHS-TYSED----KKDQKVLIHAMK-RNKGLDWHAGND-MKGGPK-IIYLGMLFY-------AENNVAKQKAH FVVFL-ISDATRLLIVHEGCMIDYTFMEFACHPFFAELFMEHMVARYQYYSNVDIKIFDTCFRSAVYAVDNSWETLCDY -EMLSGYFGAEINHNPRLPEQVC-PPCTITVSGLKGKLHNRWPVLTQITRTMRIQIAIRVSGSE-SENVVLQYCLYYEE NKSIR-----------QNNTGSADLPK---------------------DRLVNSFVD-ELYNSI---YTAAPKKK--RH L-GIGDNGGMELVREDIFQILVCV >t32 LIVSQIRVMVRDG--IHKAMDEE-V------------LIARRVKPYSGNGTQVRNDVEDAN--CQEFVGI---RELGKH -YK-CMDGFHTVNNGAGEN-SESAMWIFDLWMCKLNHGMQR--------GDIGRVKQDVQKFPKLKEGAPNCSSFVKPY FMGCDMFHKQVEYRGTHGLVLDDTWNEESATFPYPQVHSRSDKRCLNGLHQGDHEESVH---HAPRI----MR---LIG H-HLVIIVLMNHDMGRLNHLEPDAVIPSPLRIGGDGWVPKPI------------------EDGFFDK-D-Q------AR PDVANASIVPDK----DQWVGAHNQWGSSLRKVGLAVHDLR-NISDLYRCVWMDCHLGANHYRQISRMKMITPTYTHMT EYANGL----WHPFYKASDHKNEAQGVGEK-PHQLPCVESETCQYETKHAKVCKQVHG-ALYLIGMKTVVKPLLGDWEG CKCRS---KLYVSQLDH----KTLSK-NLGLY--MQKRTWLATAPLQ------IGCMFMLVGRKKNSEE-NYNKAADPT VIWFYTQPIVYV-ADVFGCTKGKNPQEMRG------NNVMSESLGDDNLQD--MSGVPVTVCTSSVMVRKDMQD-SVDK RGCTWNAKE-DHLCPSSFCKGER---EDEPGGVTQ-----------------RCIENIAKLLYIKDV-LCNDVLNLLVW QLCWSV-GDWESLIPQACWDAKKDLAVCAWKMELVPGL---NRNNENLAKVIYFGPDGH-------DEGPMQAKIVTLH W-EMDVSHRG-SSVDDNVV-VVMLAFAVSFCHPWGHYIQGLGD-QHKLARPNT---AQKLSILT---VFHFSGGFRDKP M-ERSQLHS-TYSED----KKDQKVLIHAMK-RNKGLDWHAGND-MKGGPK-IIYLGMLFY-------AENNVAKQKAH FVVFL-LSDATRLLIVHEGCMIDYTFMEFACHPFFAELFMEHMVARYQYYSNVDIKIFDTCFRSAVYAVDNSWETLCDY -EMLSGYFGAEINHNPRLPEQVC-PPCTITVSGLKGKLHNRWPVLTQITRTMRIQIAIRVSGSE-SENVVLQYCLYYEE NKSIR-----------QNNTGSADLPK---------------------DRLVNSFVD-ELYNSI---YTAAPKKK--RH L-GIGDNGGMELVREDIFQILVCV >t75 LIVSEIRVMVRDE--VHKAMDEE-C------------LIARRVKPYSGNGNQIRNDIEDAN--GQEFVGI---RELGKH -YD-CMDGYHTVNNGAGEN-SESAMWIFDLWMCKLNHGMQR--------GDIGRVQQDVHKFPKLKEGAPNCSSFVKPV FMGCDMFHRQVQNRGNHGLVLDDTWNEESGTFPYPQVHSRNEKRCINKLHQGDHNESCH---HNPRI----VR---RIG H-HLVIIVLVNRDMGRLNYIEPDALVPSPLRFGGDEWVPKPI------------------EDGFFDK-D-Q------AR PDVANA-IVPDR----DQWVGAHKEWGSSLCKVGLAVHDLR-NVSDLYRCVWMECVLGANHYQQVSRMKMVTPAYTHMT EYANGL----WHPFYKNPDHKNEAQGVGEK-PHQLPCVESETCQYETKHAKVCKQVHG-ALYLIGMKTVAKPVLGDWEG CKCRSDLIKLYVSQLDH----KTLSK-NMGLY--MRNRTWLATSPLQ------IGCIFMLVGRKKNSEE-NYNKAADPT VIWFYTQPIVYV-ADVFGCTKGKNPQEMRK------NNVISESLGDDNLQD--MPGMPVTVCTSSVMVRKDMHQ-SVDK RGYTWHAKE-DHLCPDSFCKGEK---EEEPGGVRR-----------------RIVENIVKLLYIKDV-LCNQVLNLLQW QLCWSV-GDWESLVPQACWGAKKDLAVCAWKMELVPGL---NRNNENLAKIIYFGPDGH-------DEGNMQAKIVTLH W-EMDVSHRG-SSVDDNVV-VVMLAFQASFCDPWGHYIQGLGD-QYKLARPNT---ARQLSILV---VMHFSGGFRDKP I-ERSQLHC-TYSEN----KKDQKVVIRAMK-RNKGCDWHAGKD-MAGGPK-IIYVGMLFY-------AESNWAKQKAH FVVFL-ISDATRLLIHHVGCMIEYTFMEFACHPFFAELFMEHMVIRYQYYNNVDIKIFDTCFRGAVYAEDNSWETLCGY -EMLSGYFGAEINHNPRLPEQVC-PPCTITVSGLKGKLHNRWPVLTQITRTMRIQIPIRISGSE-SENVVLQYCLYYEE NKSIR-----------QNNTGSAVLPK---------------------DRLVNNFVD-ELYNSV---YTAAPKKK--RH L-GIGDNGGMELVREDIFQILVCV >t79 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTFSGSQNQVRNAVEDAA--RPDFVGT---RELGKQ -YE-CMDGVGAVDTGAGDN-SESAVVIFDIWMCQLPHGMQW--------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPY FLGVDMFHREVILKGAKGLVLPD--NEDSMAFPYVQVHSRSVKRCLNVLHQGDHEESRH---HNARI----VR---RIG L-NLVIISYANVDMGRLNHCEDEAIVFSPLRYGGAGFVSKPI------------------GDGFFDK-D-Q------AR PLMANAVCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NISRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVM EYAHGL----WKPFYQASDHKNEAQGVGER-THQLPCVESHTCQYEVKHAKVCKLVHH-ALYLLGLRTVMKPHLGDVDG CRCRSDLNNLILSQLDD----KTLSK-NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQSAN-NYNRAGDPT TICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDDSLQQ--MPAVPVDVCVMSVMVNKSMPQ-SHDK RGYTWQAKQ-DHLFPVNVVKGEQ---EEEPEGADQ-----------------RTVHDMVRLLYSKDV-DCNMVLSLLVW QLC--H-GDWEPLLPQACESARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEGPMGQKIVDLH W-EMEASHRE-SSVRDNMI-VIMLPFNINSIDPWRHYILGLND-QIQLTRPNT---ARRLSILH---VMHYSGEFHDKS A-ERSQLHS-RYSEH----KNDHKCVILASK-HNKDHDWATGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNH FVIFL-ICDAVRILILHHGCMTDHTFMEFACNPFFSELFMEHVYIRYQYINNHDIKIKDTVFRLAVYAWDNFWETLCKC -EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPLIRIKIREPGSE-SENTVVMYVIYYKE RKSIR-----------QNNTGCAKLPQ---------------------ARLVNKVVD-ELYNNV---YHAIPKKE--MN Q-GVGDSGGVEMVQEDVFQILVCI >t27 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTMSGSQNQVRNAVEDAA--RPDFVGI---RELGKQ -YE-CMDGVGAVDTGAGDN-SESAVVIFDIWMCQLPHGMQW--------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPY FLGCDMFHREVILKGAKGLVLPD--NEDSMAFPYVQVHSRSVKRCLNVLHQGDHEESRH---HNARI----VR---RIG M-NLVIISYANVDMGRLNHCEDQAIVYSPLRYGGAGFVSKPI------------------ADGFFDK-D-Q------AR PLMANAVCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NISRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVM EYAHGL----WKPFYQASDHKNEAQGVGER-THQLPCVESHTCQYEVKHAKVCKLVHH-ALYLLGIHTVMKPHLGNVDG CRCRSDLNNLILSQLDD----KTLSK-NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQSAN-NYNRAGDPT TICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDDSLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDK RGYTWQAKQ-DHLFPVNCVKGEQ---EEEPEGADQ-----------------RTVHDMVRLLYSKDV-DCNIVLSLLVW QLC--H-GDWEPLLPQACEGARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEGPMGQKIVDLH W-EMEASHRE-SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTRPNT---ARRLSILH---VMHYSGAFHDKS A-ERSQLHS-RYSEH----KNDHKCVILASK-VNKDHDWITGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNH FVIFL-ICDAVRILILHHGCMTDHTFMEFACNPFFSELFMEHVYIRYQYVNNHDIKIKDTVFRLAVYAWENFWETLCNC -EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPLIRIKIREPGSE-SENTVVMYVIYYKE RKSIR-----------QNNTGCAKLPQ---------------------ARLVNKVVD-ELYNNV---YHAIPKKD--MN Q-GVGDSGGVEMVQEDVFQILVCI >t93 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTMSGSQNQVRNAVEDAA--RPDFVGI---RELGKQ -YE-CMDGVGAVDTGAGDN-SESAVVIFDIWMCQLPHGMQW--------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPY FLGCDMFHREVILKGAKGLVLPD--NEDSMAFPYVQVHSRSVKRCLNVLHQGDHEESRH---HNARI----VR---RIG M-NLVIISYANVDMGRLNHCEDQAIVYSPLRYGGAGFVSKPI------------------ADGFFDK-D-Q------AR PLMANAVCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NISRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVM EYAHGL----WKPFYQASDHKNEAQGVGER-THQLPCVESHTCQYEVKHAKVCKLVHH-ALYLLGIHTVMKPHLGNVDG CRCRSDLNNLILSQLDD----KTLSK-NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQSAN-NYNRAGDPT TICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDDSLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDK RGYTWQAKQ-DHLFPVNCVKGEQ---EEEPEGADQ-----------------RTVHDMVRLLYSKDV-DCNIVLSLLVW QLC--H-GDWEPLLPQACEGARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEGPMGQKIVDLH W-EMEASHRE-SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTRPNT---ARRLSILH---VMHYSGAFHDKS A-ERSQLHS-RYSEH----KNDHKCVILASK-VNKDHDWITGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNH FVIFL-ICDAVRILILHHGCMTDHTFMEFACNPFFSELFMEHVYIRYQYVNNHDIKIKDTVFRLAVYAWENFWETLCNC -EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPLIRIKIREPGSE-SENTVVMYVIYYKE RKSIR-----------QNNTGCAKLPQ---------------------ARLVNKVVD-ELYNNV---YHAIPKKD--MN Q-GVGDSGGVEMVQEDVFQILVCI >t59 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTFSGSQNQVRNAVEDAA--RPDFVGI---RELGKQ -YE-CMDGVGAVDTGAGDN-SESAVVIFDVWMCQLPHGMQW--------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPY FLGCDMFHREVILKGAKGLVLPD--NEDSMAFPYVQVHSRSVKRCLNVLHQGDHEESRH---HNARI----VR---RIG M-NLVIISYANVDMGRLNHCEDEAIVFSPLRYGGAGFVSKPI------------------ADGFFDK-D-Q------AR PLMANAHCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NISRLLRCVVMECILGANVYHQISRMKMHGPTYSHVM EYAHGM----WKPFYQASDHKNEAQGVGER-THQLPCVDSHTCQYEVKHAQVCKLVHH-ALYLLGLRTVMKPHLGDVDG CRCRSDLNNLILSQLDD----KTLSK-NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQSGN-NYNQAGDPT TICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDDSLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDK RGYTWQAKQ-DHLFPVNFVKGEQ---EEEPEGADQ-----------------RTVHDMVRLLYSKDV-DCNIDLSLLVW QLC--H-GDWEPLLPQACESARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEGPMGQKIVDLH W-EMEASHRE-SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTRPNT---ACRLSILH---VMHYSGEFHDKS A-ERSQLHS-RYSEH----KNDHKCVILASK-HNKDHDWATGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNH FVIFL-ICDAVRILILHHGCMTDHTFMEFACNPFFAELFMEHVYIRYQYVNNHDIKIKDTVFRLAVYAWENFWETLCNC -EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPLIGIKIREPGSQ-SENTVVMYVIYYKE RKSIR-----------QNNTGCAKLPQ---------------------HRLVNKVVD-ELYNNV---YHAIPKKD--MN Q-GVGDSGGVEMVQEDVFQILVCI >t7 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKKFSGSQGQVRNAIEDAA--RPDFVGI---RELGKQ -YE-CMDGVGAVDTGAGDN-SESAVVIFDIWMCQLPHGMQW--------GDIGRVVVDHPKFLKLEEGAPNCSSMPMPY FLGCDMFHREVILKGAKGLVLPD--NEDSMAFPYVQVHSRSVKRCLNVLHQGDHEESRH---HNARI----VR---RIG M-NLVIISYANVDMGRLNHCEDEAIVFSPLRYGGAGFVSKPI------------------ADGFFDK-D-Q------AR PLMANAHCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NISRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVM EYAHGM----WKPFYQASDHKNEAQGVGER-THQLPCVDSHTCQYEVKHAQVCKLVHH-ALYLLGLRTVMKPHLGDVDG CRCRSDLNNLILSQLDD----KTLSK-NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQSGN-NYNRAGDPT TICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDDSLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDK RGYTWQAKQ-DHLFPVNFVKGEQ---EEEPEGADQ-----------------RTVHDMVRLLYSKDV-DCNIVLSLLVW QLC--H-GDWEPLLPQACESARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEGPMGQKIVDLH W-EMEASHRE-SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTRPNT---ACRLSILH---VMHYSGEFHDKS A-ERSQLHS-RYSEH----KNDHKCVILASK-HNKDHDWATGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNH FVIFL-ICDAVRILILHHGCMTDRTFMEFACNPFFAELFMEHVYIRYQYVNNHDIKIKDTVFRLAVYAWENFWETLCNC -EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPLIGIKIREPGSQ-SENTVVMYVIYYKE RKSIR-----------QNNTGCAKLPQ---------------------HRLVNKVVD-ELYNNP---YHAIPKKD--MN Q-GVGDSGGIEMVQEDVFQILVCI >t78 LIVSEVRVMVRDG--LNIAIDEL-C------------LIANRVKTFSGSQGQVRNAIEDAA--RPDFVGI---RELGKQ -YE-CMDGVGAVDTGAGDN-SESAVVIFDIWMCQLPHGMQW--------GDIGRVVVDHPKFLKLEEGAPNCSSIPMPY FLGCDMFHREVILKGAKGLVLPD--NEDSMAFPYVQVHSRSVKRCLNVLHQGDHEESRH---HNARI----VR---RIG M-NLVIISYANVDMGRLNHCEDEAIVFSPLRYGGAGFVSKPI------------------ADGFFDK-D-Q------AR PLMANAHCVPEN----DQWTGAVGQWCSSVVKCGLADHPLE-NISRLLRCVVMECVLGANVYHQISRMKMHGPTYSHVM EYAHGM----WKPFYQASDHKNEAQGVGER-THQLPCVDSHTCQYEVKHAQVCKLVHH-ALYLLGLRTVMKPHLGDVDG CRCRSDLNNLILSQLDD----KTLSK-NCFTY--LGNMTWLATSPLH------IGCMIIFVGRKQQSGN-NYNRAGDPT TICFCRKPIVMH-ADVFGCARAKDPQEMRM------NNVISESLGDDSLQQ--MPAVPVTVCVMSHMVKKSMPQ-SHDK RGYTWQAKQ-DHLFPVNFVKGEQ---EEEPEGADQ-----------------RTVHDMVRLLYSKDV-DCNIVLSLLVW QLC--H-GDWEPLLPQACESARRDLAVCAWKRELVPGL---NHNNEFLAKIIYFGPDGH-------DEGPMGQKIVDLH W-EMEASHRE-SSVRDNMI-VIMLPFSINSIDPWRHYILGLND-QIQLTRPNT---ACRLSILH---VMHYSGEFHDKS A-ERSQLHS-RYSEH----KNDHKCVILASK-HNKDHDWATGKDEMKGGAK-IMNPGALFY-------AEQNWVRPKNH FVIFL-ICDAVRILILHHGCMTDRTFMEFACNPFFAELFMEHVYIRYQYVNNHDIKIKDTVFRLAVYAWENFWETLCNC -EMLSGHVGAKINHNPRLFEQVC-PPVELTVSGLKGKV-NGWPYLTDITRTPLIGIKIREPGSQ-SENTVVMYVIYYKE RKSIR-----------QNNTGCAKLPQ---------------------HRLVNKVVD-ELYNNP---YHAIPKKD--MN Q-GVGDSGGIEMVQEDVFQILVCI >t12 LIVSEVRVMVRDG--INIAIDEL-C------------LIANRVKAFSGHQNQVRNAMEDAQ--RPDFVGI---RELGKQ -YQ-CMDGHGAVDTGAGRN-SESAVWIFDIWMCKLAHGMQW--------GDIGRVSVDHPKFLKLEEGAPRCSSCPMPY FLGCEMFHRQVILKGAKGLVLPD--NEDSMAFPYVQVHSRSVKRCLNVLVQGDHDESRH---HNARI----VA---RIG M-NMVIIRYLNVEMGRLNHCEDDAMVYSPLRIGGAGTHSKPI------------------ADGFFDK-D-Q------AR PLIANAHIVPEN----DQWTGAVAQWCSSVIKCGLANHDLE-NISRLLRCVVMECVLGANVYHQASRMKMHGPYYAHVT EYAHGL----WKPFYQTSDHKNEAQGVGES-THQLPCVESQTCQYEVKHAKVCKLVHH-ALYLLALHTMMKPRLGDIDG CRCRSELNKLALSQLDD----KTLSK-NCFQY--LGNMTWLATSPLH------VGCMIIF------------------- -----------------------------V------NNVLSQSLGDDSLQQ--MHAMPVTVVVVSHMVKKSMPQ-SHDK RGYTWQAKDFDQLIPVSFVKGEQ---EEEPEGPNN-----------------RVVHNIVKLLYTKDV-DCNTVLSLLIW QLC--H-GDWEQLVPQACAGARSDLAVCAWKRELVPGL---NHNNENLAKVIYFGPDGH-------DEGPMGRKIVDLH W-EMEASHRD-SSVRDNML-VIMLPFLVNNLDPWRHYILGLND-QIQLTRPNT---ARRLSILM---VMHYSGEFHDKS P-ERSQLHFDSYSEH----KNDHKCVILASK-HNKGLDWATGKDDMKGGAK-IMMSGALFY-------AEQNWIRARNH FVIFL-ICDAVRILIVLHGCMTAWTFMEFACNPFFAELFMEHVYIRYQYVNNHDIKIKDTEFRLAVYAWENSWETLCNC -EMLSGHIGAKINHNPRLFEQVC-PPCELTVSGLKGKH-VGWPYLTDITRTPLIRIQIREPGSE-CENSVVMYVIYYKE RKSIR-----------QNNTGCAKLPQ---------------------HRLVNKVVD-ELYNNV---YHAIPKKD--IN V-GVGDSGGMEVVQEDVFQILVCI >t81 LIVSEVRHMVRDG--ANVAIDEL-C------------LIACRVKAFSGHGHQVRNAVEDAA--RPDFIGI---RELGKP -Y--CMDGHGAVNTGAGHN-SESAVWIFDIWMYKLSHGMQW--------GDIGRVGVDHPKFLKLEEGAPNCSSLPMPY FLGVDMFHRQVTIKGAKGLVLPDIWNESPMAFPYAQVHSKSHKRCLNVLVQGDHEESEH---HNARI----MS---KIG T-HLVIINIINVEMQRLNHCEDKAIVYSPDRIGGAGFHSKPI------------------ADGFFDK-D-Q------AR PIIANAHIVPVN----DQWTGPMAQWCSSVIKCGLANHELE-DVSRLLRCVGMECNLGANHYHQVSRMKMHGPIYSHMT EYAVGL----WKPFYQASEHKNEAQGLGER-PHQLPCVESQTCQYEIKVAKVCKLVHV-ALYLVGLKTVVKPKLGDWDG IRCRSELNKLYLSQLDD----KTLSK-NCFIY--LGNMTWLATSPLQ------IGWLVLFDGRKRQSAE-DYNRAADPT TISFCRKPIVQH-ADVFGCDRDKDPQEMRT------NNVISESLGDDFLQQ--MPAVPVSVCVYSHMVKKFVPQ-SHEK RGYTWKAKE-DHLVPISYCKGEH---EEEPEGAQY-----------------RVVQNIVKLLYTKDV-DCNVVLSLLVW QLC--H-GDWEPLIPQACQTAKKDLAVCAWKRELVPGL---NCNNEYLAKIIYFGPDGH-------DEGPMGRKIVDLH W-EMEASHRD-SSVRDNYI-VIMLPFKINSRDPWRHYILGIND-QVCLNRPET---ARRLSILA---VMHYSGEFHDKS P-ERSHLHF-RYSEM----KNDVKCVI---K-SNKGLDWTAGKDGMKGEAK-AMMNGALFY-------AEQNWFRKKNH FVIFV-FCDAVRILIMHSGVMIGYTFMEFACNPFFAELFMEHVMVRYQYESNHDIKIKDTCFRLAVYAWENSWETLCNV -EMLSGHFGAKINHVPRLPEQVC-PPCDLTVSGLKGKQ-NGLPYLTDITRTPLIRIQIRVPGSE-SENVVVMYMVYYKE RKSIR-----------QNNTGCANLPQ---------------------HRLVVKVAD-ELYNNI---YHAIPKKD--GV LLGIGDSGGMEMVQEDLFQILVCA >t21 LIVSEVRHMVRDG--ANIAIDEL-C------------LIACRVKAFSGHGNQVRNAVEDAP--RPDFIGV---RELGKP -Y--CMDGHGAVNTGAGVN-SESAVWIFDIWMYKLSHGMQW--------GDIGRVDVDHPKFLKLEEGAPNCSSLPMPY FLGVDMFHKQVTLKGAKGLVLPDIWNESPMAFPYAQVHSKSHKRCLNVLVQGDHEESEH---HNARI----MS---RIG T-HLVIVNVINVEMQRLNHCEDKAIVYSPDRIGGAGFHSKPI------------------ADGFFDK-D-Q------AR PIIANAHIVPIY----DQWTGPMAQWCSSVIKCGLANHELK-VVSRLLRCVGMECNLGANHYHQVSRMKMHGPIYSHMT EYAHGL----WKPFYQASEHKNEAQGVGER-PHQLPCVESQKCQYEVKVAKVCKLVHV-ALYLVGLKTVVKPKLGDWDG VRCRSDLNKLYLSQLDD----KTLSK-NCFRY--LGNMTWLATSPLQ------NGCVVIFAGRKRQSAE-DYNRAADPT TIAFCRKPIVQH-ADVFGCDKDKDPQEMRM------NNVISESLGDDFLQQ--MPAVPVSVCVHSHMVKKFVPQ-SHEK RGVTWKAKE-DHLVPISLCKGEH---EEEPEGAQY-----------------RCVQNIVKLLYTKDV-DCNVVLSLLVW QLC--H-GDWEPLVPQACQTAKKDLAVCAWKRELVPGL---NCNNEYLAKIIYFGPDGH-------DEGPMGRKIIQLH W-EMEASHRD-SSVRDNII-VIMLPFKINSRDPWRHYILGLND-QVCLTRPET---ARRLSILV---VMHYSGEFHDKS P-ERSHLHY-RYSEV----KNDVKCVILASK-SNKGLDWTAGKDGMKGENK-AMMNGTLFY-------AEQNWFRQKNH FVIFV-ICDAVRILIVHSGVMTGFTFMEFACNPFFAELFMEHVYIRYQYQANHDIKIKDTCFRLAVYAWENSWETLCNV -EMLSGHFGAKINHVPRLPEQVC-PPCDLTVSGLKGKQ-NGWPYLTDITRTPLIRIQIRVPGSE-SENITVMYMVYYKE RKSIR-----------QNNTGCANLPQ---------------------HRLVVKVAD-ELYNNI---YHAIPKKD--GV LLGIGDSGGMEMAQEDLFQILVCV >t80 LIVSEVRHMVRDG--VNIAVDEI-C------------LIANRVKSMSGQGNQVRNAMEMAA--RQNFVGM---RELEKV -YE-CMDGQGAVNTEAGNN-SESAVWIFDIWMCKLTHGMQDFGD-----GDIGRVVVDHPKFEKLEEGAPNCSSHPVPY FLGIDMFHKQVMAKGSKGLILPDTWNEASMAFPYPQVHSKSHRRVLNVLHQGDVEES-H---HSARC----VR---RIG F-HLVIIVFWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPV------------------DDGFFDK-D-Q------AR PVIASAHIVPEH----DQWSGAMAQWCSSVIKCGLANHDLE-NVSRLLRCVMIDCNQGANHYIQISRMKVHGPTYSHMT EYAHGL----WKPFYQASDHKNDAQGVGER-PHQLPCVESQTCQYEMKHAKVCKLVHG-ALYLIALKTVVKPKLGVWQG CRCRSQLSKLVLSQLDE----KTLSK-NCSIY--LGNMTWLATSPLQ------IGCVILFGGRKRQSHGLNYNRAADPT TICFYKKPIVVQGADVFGCARGKDPQEMKA------NNVISGSLGDDRLQQ--MPAMPVTICVSSYMVKKSVIQ-SHQK RGYTWRAKE-DHLIPVSFCKGEL---DEEPDAAQQ-----------------RIVQNIVKLLYTKDV-SCNVVLSMLIW QIC--H-GDWEPQIPKACSNARKDLAVCAWKRELVPGL---NHNNENLAKIMYFGHDGH-------DEGPMGRKIVTLH V-EMEVSHRE-SSVKDNMIRVIMLPFSNSSVDPWRHVILGLND-QIKLTRPNT---AHRLSILV---VTHYSGGFHDKS A-ERSQHHF-IYNEV----KDDHKCVILASK-CNKGLDWAAGKD-MKGGAK-VMMNGALFY-------AERNWIRQKHH FVIFV-VCDAVRILIVHYGCMIGYTFMDFACNPFFAELFMDHVFIRYVYMNNHDIKIMDTCFRVAVYTWENSWETCFNR -EMLSGTFGAKISHNPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTDITRTPLIKIHIRVPGSQ-SENHVVMYCVYYKE HKSIR-----------QNNTGCANLPQ---------------------HRLVDQVVD-ELYSKF---YHAIPKKP--GN M-GVGDSGGMEMVQEDVFQILVCL >t14 LIVSEVRHMVRDG--VNIAVDEI-C------------LIANRVKSMSGQGNQVRNAMEMAA--RQNFVGM---RELEKV -YQ-CMDGQGAVNTEAGNN-SESAVWIFDIWMCKLTHGMQDFGD-----GDIGRVVCDHPKFEKLEEGAPNCSSHPMPY FLGVDMFHEQVMAKGSKGLILPDTWNEASMAFPYPQVHSKSHRRVLNVLHQGDVEES-H---HSARC----VR---RIG F-HLVIIVYWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPV------------------DDGFFDK-D-Q------AR PVIASAHIVPEH----DQWSGAMAQWCSSVIKCGLANHDLE-NVSRLLRCVMIDCNQGANHYIQISRMKVHGPTYSHMT EYAHGL----WKPFYQASDHKSDAQGVGER-PHQLPCVESQTCQYEMKHAKVCKLVHG-ALYLIALKTVVKPKLGVWQG CRCRSQLSKLVLSQLDE----KTLSK-NCSIY--LGNMTWLATSPLQ------IGCVILFGGRKRQSHGLNYNRAADPT TICFYKKPIVVQGADVFGCARGKDPQEMKA------NNVISGSLGDDRLQQ--MPAMPVTICVSSYMVKKSVPQ-SHQK RGYTWRAKE-DHLIPVSFCKGEL---DEEPDAAQQ-----------------RIVQNIVKLLYTKDV-SCNVVLSMLIW QIC--H-GDWEPQIPKACSNARKDLAVCAWKRELVPGL---NHNNENLAKIMYFGHDGH-------DEGPMGRKIVTLH V-EMEVSHRE-SSVKDNMIRVIMLPFPSSSVDPWRHVILGLND-QIKLTRPNT---AHRLSILV---VTHYSGGFHDKS A-ERSQHHF-IYNEV----KDDHKCVILASK-CNKGLDWAAGKD-MKGGAK-IMMNGALFY-------AERNWIRQKHH FVIFV-VCDAVRILIVHYRCMIGYTFMDFACNPFFAELFMDHVFIRYQYMNNHDIKIMDTCFRVAVYTWENSWETCCNR -EMLSGTFGAKISHNPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTEITRTPLIKIHIRVPGSQ-SENHVVMYCVYYKE HKSIR-----------QNNTGCANLPQ---------------------HRLVDQVVD-ELYSKF---YHAIPKKP--GN M-GVGDSGGMEMVQEDVFQILVCL >t85 LIVSEVRHVVRDG--VNIAVDEI-C------------LIANRVKSMSGQGNQVRNAMEMAA--RQNFVGM---RELDKA -YQ-CMDGQKAVNTEAGNN-SESAVWIFDICMCKLTHGMQD--------GDIGRVVCDHPKFEKLEEGAPNCSSIPMPY FLGIDMFHKQVMAKGSKGLILPDTWNEASMAFPYPQVHSKSHRRVLNVLHQGDHEES-H---VSARC----MR---RIG F-HLVIINFWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPI------------------NDGFFDK-D-Q------AR PVIANAHIVPEH----DQWSGAMAQWCSSVIKCGLANHDLE-NVSRLLRCVEIDCNQGANHYVQISRMKMYGPTYSHMT EYAHGL----WKPFYQASDHKNDAQGVGER-PHQLPCVESQTCQYEVKHANVCKLVHA-ALYLIGLKTVVKPELGVWEG CRCRSQLSKLALSQLDE----KTLSK-NCSIY--LGNMTWLATSPLQ------IGCIILFGGRKRQSHGLNYNRAADPT MICFYKKPIVVQGADIFGCARGKDPQEMKS------NNVISGSLGDDRLQH--MPAMPVTICVFSYMVKKAVPQ-SHQK RGYTWRAKE-DHLIPVSFCKGEL---DEEPDGAQQ-----------------RVVQNIVKLLYTKDV-SCNKVLSMLIW QIC--H-GDWEPQIPKACNAARKDLAVCAWKRELVPGL---NHNNEDLAKIMYFGPDGH-------DEGPMGRKIVNLH V-EMEVSHRE-SSVKDNMIRVVMLPFDNNSWDPWRHVILGLND-QIKLTRPNT---AHRLSILV---VMHYSGGFHDKS S-ERSQHHF-IYNEV----KNDHKCVILASK-SNKGLDWAAGKD-MKGGAK-MMANGALFY-------EERNWIRQKNH FVIFV-VCDAVRILIVHYGCMVPYTFMAFACNPFFAELFMEHVFIRYQYVNNHDIKIMDTCFRVAVYTWENSWETCCNR -EMLSGTFGAKISHNPRLPEQVI-PPCDLTVSGLKGKH-NGWPYLTDITRTPLIKIHIRIPGSQ-SENHVVMYCVYYKE HKSIR-----------QNNTGCANLPQ---------------------HRLVDKVVD-ELYSKF---YHAIPKKP--GN M-GIGDSGGMEMVQEDLFQILVCI >t62 LIVSEVRVMVRDG--IHIAVDEI-C------------LIANRVKCMSGQGNQARNAMEMAA--RQNFVGM---RELGKQ -YQ-CMDGQGAVNTEAGNN-SESAVWIFDIWMCRLTHGMQD--------GDIGRVECDHPKFAKLEEGAPNCSSLPMPY FLGIDMFHNQVMAKGSKGLILPDTWNEASMAFPYPQVHSKSHRRVLNVLHQGDHEES-H---HSARC----MR---RIG Y-HRVIINYWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPI------------------ADGFFDK-D-Q------AR PVIANAHIVPEY----DQWSGAMAQWCSSVIKCGLANHDLE-NVSRLLRCVVIDCNHGANHYVQISRMKMHGPTYSHMT EYAHGL----WKPFYQASDHKNDAQGVGER-PHQLPCVEPQTCQYEVKHAKVCKLVHG-ALYLIGLQTVVKPKLGVWEG VQCRSQLSKLILSQLDD----KTLSK-NCSIY--LGNMTWLATSPLQ------IGCIIMFDGRKRQSHGLQYNRAADPT TICFYKKPIVVQGADIFGCARGKDPQEMKA------NNVTSGSLGDDRLQQ--MPAMPVTICVFSYMVKKSVPQ-SHVK RGYTWRAKE-DHLIPVSFCKGEL---EEEPEGAQH-----------------RMVQNIVKLLYTKDV-SCNVVLSMLIW QIC--H-GDWEPQIPKACDAARKDLAVCAWKRELVPGL---NHNDENLAKIMYFGPDGH-------DEGPMGEKIVNLH V-EMEASHRE-SSVKDNMIRVIMLPFSNNSWDPWRHVILGLND-KIKLTRPNT---AHRLSILV---VMHYSGGFHDKS A-ERSQHHF-IYNEI----KNDHKCVILASK-ANKPLDWAAGKD-MKGGAK-MMANGALFY-------AERNWIRQKNH FVIFI-ICDAVRILIVHYGCMVGDTFMDFACNPFFAELFMEHVFIRYQYIPNHDIKIMDTCFRVAVYTWENSWETCCNR -EMLSGTFGARISHVPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTDITRTPLIRIHIRVPGSE-SENHVVMYCVYYKE HKSIR-----------QNNTGCANLPQ---------------------HRLVDRVVD-ELYSKF---YHAIPKKP--GN M-DVGDSGGMEMVMEDVFQILVCV >t19 LIVSEVRHMVRDG--INIAVDEI-C------------LIANRVKCVSGQGNQARNAMEMAA--RQNFVGM---RELGKQ -YQ-CMDGQGAVNTEAGNN-SESAVWIFDIWMCRLTHGMQD--------GDIGRVECDHPKFAKLEEGAPNCSSLPMPY FLGIDMFHKQVMGKGSKGLILPDTWNEASMAFPYPQVHSKSHRRVLNVLHQGDHEES-H---HSARC----MR---RIG YGHLVIINFWNVEMGRLNHCEDEAIIYSPLRIGGAGFHSKPI------------------ADGFFDK-D-Q------AR PVIANAHIVPEY----DQWSGAMAQWCSSVIKCGLANHDLE-NVSRLLRCVVIDCNHGANHYVQISRMKMHGPTYSHMT EYAHGL----WKPFYQASDHKNDAQGVGER-THQLPCVEPQTCQYEVKHAKVCKLVHG-ALYLIGLQNVVKPKLGVWEG VQCRSQLSKLILSQLDD----KTLSK-NCSIY--LGNMTWLATSPLQ------IGCIILFDGRKRQSHGLQYNRAADPT TICFYKKPIVVQGADIFGCARGKDPQEMKA------NNVTSGSLGDDRLQQ--MPAMPVTICVFSYMVKKLVPQ-SHQK RGYTWRAKE-DHLIPVSFCKGEL---EEEPEGAKQ-----------------RMVQNIVKLLYTKDV-SCNVVLSMLIW QIC--H-GDWEPQIPKACDAARKDLAVCAWKRELVPGL---NHNDENLAKIMYFGPDGH-------DEGPMGEKIVNLH V-EMEASHRE-SSVKDNMIRVIMLPFSNNSWDPWRHVILGLND-QIKLTRPNT---AHRLSILV---VMHYSGGFHDKS A-ERSQHHF-IYNEI----KNDHKCVILASK-ANKGLDWAAGKD-MKGGAK-MMVNGALFY-------AERNWIRQKNH FVIFV-ICDAVRILIVHYGCMVGDTFMDFACNPFFAELFMEHVFIRYQYIPNHDIKIMDTCFRVAVYTWENSWETCCNR -EMLSGTFGARISHVPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTDITRTPLIRIHIRVPGSE-SENHVVMYCVYYKE HKSIR-----------QNNTGCANLPQ---------------------HRLVDKVVD-ELYSKF---YHAIPKKP--GN M-DVGDSGGMEMVMEDVFQILVCV >t77 LIVSEVRHMVRDG--INIAVDEI-C------------LIANRVKSMSGQGNQARNAMEMAA--RQNFVGM---RELGKQ -YH-CMDGQGTVNTEAGNN-SESAVWIFDIWLCRLTHGMQD--------GDIGRVECDHPKFAKLEEGAPNCSSLPMPY FLGIDMFHRQVMAKGSKGLILPDTWNEASMAFPYPQVHSKSHRRVLNVLHQGDHEES-H---HSARC----MR---RIG Y-VLVIINFWNVEMGRLIHCEDEAIIYSPLRIGGAGFHSKPI------------------ADGFFDK-D-Q------AR PVIANAHIVPEY----DQWSGAMPQWVSSVIKCGLANHDLE-NVSRLLRCVVIDVNQGANHYVQISRMKMHGPTYSHMT EYAHGL----WKPFYQASDHKNDAQGVGER-VHQLPCVEPQTCQYEVKHAKVCKHVHG-ALYLIGLQTVVKPKLGVWEG CQCRSQLSKLILSQLDD----KTLSK-NCNIY--LGNMTWLATFPLQ------IGCIILFDGRKRQSYGLQYNRAADPT TICFYKKPIVVQGADIFGCARGKDPQEMKA------NNVTSGSLGDDRLQQ--MPAMPVTICVFSYMVKKSVPQ-SHQK RGYTWRAKE-DHLIPVSFCKGEL---EEEPEGAQQ-----------------RIVQNIVKLLYTKDI-SCNVVLSMLIW QIC--H-GDWEPQLPKACDAARKDLAVCAWKRELVPGL---NHNDENLAKIMYFGPDGH-------DEGPMGEKIVNLH V-EMEASHRE-SSVKDNMIRVIMLPFGENSWDPWRHVILGLND-QIKLTRPNT---AHRLSILV---VMHYSGGFHDNS A-ERSQHHF-IYNEV----KNDHKCVILASK-ANKGLDWAAGKD-MKGGAK-MMVNGALFY-------AERNWIRQKNH FVIFV-ICDAVRILIVHYGCMVGDTFMDFACNPFFAELFMEHVVIRYQYIPNHDIKIMDTCFRVAVYTWENSWETCCNR -EMLSGTFGARISHVPRLPEQVI-PPCELTVSGLKGKH-NGWPYLTDITRTPLIRIHIRVPGSE-SENHVVMYCVYYKE HKSIR-----------QNNTGCANLPQ---------------------HRLVDKVVD-ELYSKF---YHAIPKKP--GN M-DVGDSGGMEMVMEDVFQILVCA >t88 LIVSECRLIIRDG--NHDAIDEM-CCVANDLNNEIERLVASMVKSFRGHDSQARNNSECMR--SAPFIGV---RELFKR -YHKCVEGAGCVHTVAGTP-SDSPVWMFDQFMCQLTHSMVD--------GDLGRVVQDNVIFAKLKEGAPHCISL--PH FLGIDMFHTQVFVGGH--LILPDPCYELSISVMYAGHASYNQKRCINNLDQGDQEDSNHRKEHKIRASVLLYR---QIG I-L-VIIKEANELMNRLNHKEPENGIIFPLR---DAQDPKQI------------------LNGLFDK-E-E------NR PMVQDADSVVGS----AQWAGQHRSWCSSDDKA-----------SQLPRNTHIVVEIGANVYEQFSRMKTNIPIYAHVT EYAVGV----ERPFYE-SEFKNEAQGWGES-GTSIPCVDSPDVQYEMKVAWVDKLMHT-ALYLMPLATVHKPEMGTVRG ERCRAIL-KLLMMQLD------TLSR-NQLPK--LCQGTWLDASPLQ------IGVQVMLVGKKGGSKK-EYELAADQV IIYFYQAPIIYVKADVFSGTVAKKAQAMR-------KSTGSQSIGDDGMQS--MPLMQNAVCVWSKMVRKVQPD-GQDK REQTWMAKD-DTLCPPSMKRGEK---TAEPTQWMG-----------------TVTVNKIKLLYCKDC-SCNEVMKILSW WLCNSV-GDWQTLMSQACITADPNPPVCVWKRELVPGL---NRSVENLAKIIYFCPDEH-------DERKMWGKIFALE W-EMDISHRH-SSVDDNHC-VEMLPFMCQRVDPWGHYVQILAD-RQDLARPVT---LQALSILP---CPHASGKEQDGA V-ERSQHYV-VYAEL----QVDHKCTIAAHKYQDKMKDWDTGKN-MDEGAK-MYVEIHLFY-------AENNIRRQNDH SVIFW-ISDAKKRLICGMSSMCLVTFMTLACNPLFAKLFMEVVPMRYDYLTNHDIKI------MEVYAPENSWETIKNV -EMVSGKEGAEINHTPQLPEQVI-PPCNLTVSNLKGMI-PPCPALTEITRTV--FIV--MYYAI-LRNKIMQYCFYYKE NRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGG---YHAAPKKE--EM V-GPGDNGGVEDIKEDMFQVLVCR >t37 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR -YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN--------GDLGRVVHDNWTFTKLKEGAPHCIAL--PY FMGIDMFHIQVFVGGN--LILPDPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIG L-L-VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNGLFEK-E-E------NR PMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVT EYAVGV----ARPFYD-AEFKNEAQGQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKG ERCRTIL-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRGSKK-DYELAADPV IIYFYQAPIIHHKADVFAGTVAKKSQAMRS------RSIGSQSVGDDGMQN--MPLMQNAVCVWSQMVRKVQPD-GQDK REQTWMAKD-DTLCPPCEEQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAW WLCDSV-GDWQTLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALE W-EMDISHRH-SSVDDNQC-VELLPYVCQRMDPWGHYVQILAD-RLDLTRPVT---LQGLSILP---CPHASGKEQDGA V-ERSQHYG-VYQEL----QNDHKSTIDANKNDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNLRKQNEH SVTFV-ISDAQKRLICGVSSMVMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPENSWETTKVV -EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPVPALTEITRTP--FIV--MYVAI-LRNDIMQYCFYYKE NRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--EL M-SPGDNGGVEAIREDMFQVLVCK >t35 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR -YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN--------GDLGRVVHDNWTFTKLKEGAPHCIAL--PY FMGIDMFHIQVFVGGN--LILPDPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIG L-L-VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNGLFEK-E-E------NR PMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEIGANVYEQYSRMKTVIPIYARVT EYAVGV----PRPFYD-AEFKNEAQGQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPQMGTVKG ERCRTIL-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRGSKK-DYELAADPV IIYFYQAPIIYHKADVFAGTVAKKSQAMRS------RSIGSQSVGDDGMQN--MPLMQNAVCVWSQMVRKVQPD-GQDK REQTWMAKD-DTLCPPCEEQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAW WLCDSV-GDWQTLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALE W-EMDISHRH-SSVDDNQC-VELLPYVCQRMDPWGHYVQILAD-RLDLTRPVT---LQGLSILP---CPHASGKEQDGA V-ERSQHYG-VYQEL----QNDHKSTIDANKNDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNLRKQNEH SVTFV-ISDAQKRLICGVSSMAMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPENSWETTKVV -EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPCPALTEITRTP--FIV--MYVAI-LRNDIMQYCFYYKE NRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--EL M-SPGDNGGVEAIREDMFQVLVCK >t48 LIVSDMRLIIREG--SDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR -YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN--------GDLGRVVHDNWTFTKLKEGAPHCIAL--PY FMGIDMFHIQVFVGGN--LILPDPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIG L-L-VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNGLFEK-E-E------NR PMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEVGANVYEQYSRMKTVIPIYARVT EYAVGV----PRPFYD-AEFKNEAQGQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKG ERCRTIL-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRGSKK-DYELAADPV IIYFYQAPIIYHKADVFAGTVAKKSQAMRS------RSIGSQSVGDDGMQN--MPLMQNAVCVWSQMVRKVQPD-GQDK REQTWMAKD-DTLCPPCEEQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAW WLCDSV-GDWQTLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALE W-EMDISHRH-SSVDDNQC-VELLPYVCQRMDPWGHYVQILAD-RLDLTRPVT---LQGLSILP---CPHASGKEQDGA V-ERSQHYG-VYQEL----QNDHKSTIDANKNDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNLIKQNEH SVTFV-ISDAQKRLICGVSSMVMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPENSWETTKVV -EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPCPALTEITRTP--FIV--MYVAI-LRNDIMQYCFYYKE NRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--EL M-SPGDNGGVEAIREDMFQVLVCK >t55 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR -YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN--------GDLGRVVHDNWTFTKLKEGAPHCIAI--PY FMGIDMFHIQVFVGGN--LILPDPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIG L-L-VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNGLFEK-E-E------NR PMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVT EYAVGV----ARPFYD-AEFKNEAQGQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLSTVHKPEMGTVKG ERCRTIL-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRGSKK-DYELAADPV IIYFYQAPIIHHKADVFAGTVAKKSQAMRS------RSIGSQSVGDDGMQN--MPLMQNAVCVWSQMVRKVQPD-GQDK REQTWMAKD-DTLCPPCEEQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAW WLCDSV-GDWQTLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALE W-EMDISHRH-SSVDDNQC-VELLPYNCQRMDPWGHYVQILAD-RLDLTRPVT---LQGLSILP---CPHASGKEQDGA V-ERSQHYG-VYQEL----QNDHKSTIDANKNDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNLRKQNEH SVTFV-ISDAQKRLICGVSSMCMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPENSWETTKVV -EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPCPALTEITRTP--FIV--MYVAI-LRNDIMQYCFYYKE NRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--EL M-SPGDNGGVEAIREDMFQVLVCK >t46 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR -YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN--------GDLGRVVHDNWTFTKLKEGAPHCIAL--PY FMGIDMFHIQVFVGGN--LILPDPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIG L-L-VIIKEANELMGRLNHKEPKNGVIFPLR---GAQGPKQI------------------SNGLFEK-E-E------NR PMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVT EYAVGV----ARPFYD-AEFKNEAQGQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKG ERCRTIL-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVKVVLVGKKRGSKK-DYELAADPV IIYFYQAPIIHHKADVFAGTVAKKSQAMRS------RSIGSQSVGDDGMQN--MPLMQNAVCVWSQMVRKVQPD-GQDK REQTWMAKD-DALCPPCKEQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAW WLCDSV-GDWQSLMNDACRSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALE W-EMDISHRH-SSVDDNQC-VELLPYVCQRMDPWGHYVQVLAD-RLDLTRPVT---LQGLSILP---CPHASGKEQDGA V-ERSQHYG-VYQEL----QNDHKSTIDANKNDNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNIRKQNEH SVTFV-ISDAQKRLICGVSSMCMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPDNSWETTKVV -EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPCPALTEITRTP--FIM--MYVAM-LRNDIMQYCFYYKE NRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--EL M-APGDNGGVEAIREDMFQVLVCK >t67 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR -YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN--------GDLGRVVMDNWTFTKLKEGAPHCIAL--PY FMGIDMFHIQVFLGGD--LILPDPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPRKEHKIRNSVLLYR---QIG L-L-VIIKEANELMGRLNHKERKNGVIFPLR---GAQGPKQI------------------SNGLFEK-E-E------NR PMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVT EYAVGV----ARPFYD-AEFKNEAQRQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKG ERCRTIL-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRGSKK-DYELAADPV IIYFYQAPIIHHKADVFAGTVAKKAQAMRS------RSIGSPSVGDDGMQN--MPLMQNAVCVWSQMVRKVQPD-GQDK REQTWVAKD-DTLCPPCKEQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAW WLCDSV-GDWQTLMNDACSSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALE W-EMDISHRM-SSVDDNQC-VELLPYVCQRMDPWGHYVQILAD-RLVLTRPVT---LHGLSILP---CPHASGKEQDGA E-ERSQHYG-VYQEL----QNDAKSTIDANKNPNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNIRKLNEH SVTFV-ISDAQKRLICGVSSMCMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPENSWETTKVV -EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPVPALTEITRTP--FIV--MYVAI-LRNDIMQYCFYYKE NRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--EL M-APGDNGGVEAIREDMFQVLVCK >t57 LIVSDMRLIIREG--NDDAIDEM-CCVANELNNVIERLVASMVKEFRGHDCQARNNSECIT--AAAFIGT---RELFKR -YPKCMEGARCVHTVAGPP-SESPVWMFDKFMCQLTHSMVN--------GDLGRVVMDNWTFTKLKEGAPHCIAL--PY FMGIDMFHIQVFIGGD--LILPDPCYELSISVMYAGHASYNQKRCINGLDNGDRQDSDPQKEHKIRNSVLLYR---QIG L-L-VIIKEANEIMGRLNHKERKNGVIFPLR---GAQGPKQI------------------SNGLFEK-E-E------NR PMVQDAGSVPGR----KQWAGQHRAWCSSDDKA-----------SQLPRNTRIVAEIGANVYEQYSRMKTNIPIYARVT EYAVGV----ARPFYD-AEFKNEAQGQGES-GTSIPCVDSPDIQYEMKHAWVEKLMHS-ALYLMPLATVHKPEMGTVKG ERCRTIL-KLLMVQLDE----KTLSQ-NQIPD--LCQRTWLDASPLQ------IGVQVVLVGKKRGSKK-DYELAADPV IIYFYQAPIIHHKADVFAGTVAKKAQAMRS------RSIGSPSVGDDGMQK--MPLMQNAVCVWSQMVRKVQPD-GQDK REQTWVAKD-DTLCPPCKDQGEK---AAEPTQWVG-----------------TCTSNVIKLLYCKDC-SCNEVLKILAW WLCDSV-GDWQTLMNDACSSANPNYPVCIWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERRMWGKIFALE W-EMDISHRV-SSVDDNQC-VELLPYLCQRMDPWGHYVQILAD-RLNLTRPVT---LQGLSILP---CPHASGKEQDGA V-ERSQHYG-VYQEL----QNDAKSTIDANKNPNKMLDWDTGKN-MDKGAK-MFYKIVLFY-------AENNIRKQNEH SVTFV-ISDAQKRLICGVSSMCMRTFMTLACNPFFAKLFMEVVPMSYDYVPNHDIKI------IEVYAPENSWETTKVV -EMVSGKSGAQINHTPQLPEQVI-PPCCLTVSNLKGMI-PPVPALTEITRTP--FIV--MYVAI-LRNDIMQYCFYYKE NRSVR-----------LNNTGRAELPK---------------------HHMQNIFID-ELYPGP---YHAAPKKE--EL M-APGDNGGVEAIREDMFQVLVCK >t56 LIVSEWRLFIRDG--HDDAIDEM-CCEANELNNDIEKLVASMVKGFRGHDSQARNNAECII--AAPFIGV---RELFKR -YIKCVEGAGCVYTVA-AP-SESPVWMFDKFMCHLTHSMVV--------GDLGRVLRDNTTFAKLKEGAPVCISL--PY FLGMDMFHQQVFMGGN--LILPDPCYELSISVMYAGHASYNQKRCLNNLDQGDREDSSHRKEHKIRRSVLLYQ---QIG C-L-VITRERNELMGRLNHKEPRDGVIFPHR---GAQGPKQL------------------ANGLFEK-E-E------NR PMVQDAGSVPER----AQWPGQQRAWCSSDDKA-----------SKLPRNTKMVAEIGANVYEQASRMKTNIPIYAHVT EYALGV----DRPFYD-SEFKNEAQGQGES-GTSIPCVDSPDVQYELKHAWVDKLMHT-ALYLMPLPTVHKPKMGTVKG ERCRAML-KLNMMQLDE----KTLSQ-NQIIK--LCQRTWLDASPLQ------IGVNCVLPGKKGGSNK-DYELAADPV IIYFYQAPIIHHKADVFSGTVAKKAQAMRQ------QSTGSQSVGDDGTQV--MPLMQNLVCVWSKMVRKCMID-GQEK REQTWMAKD-DKLCPPSQEQGEK---AAEPTQWED-----------------ICTANVIKLLYCKDC-SCNEVLRVLSW WLCDSV-GDWRTLMSDACALANPNPPVCVWKRELVPGL---NRNVENLAKIIYFCPDEH-------DERKMWGKIFCLE W-EMDISHRH-SSVDDNVC-VEMLPFVCQKMDPWGHYVQILAD-RLDLTRPVT---LQGLGILV---CPHASGKEQDGA M-ERSQHYV-VYAEL----QNDHKNTINANKVPRKMLDWDTGKN-MDKGAKGMYHEIVLFF-------AENNVKKQVEH SVIFI-IADAQKRLICGVSSMCLQTFMNLACNPFFAKLFMEVIPMRYDYQTNHDIKI------KEVYAPENSWETIKNV -EMVSGKAGKQINHEPQLPEQVI-PPCSLTVSNLKGMM-PPCPAMTEITRTV--III--MYYAI-LRNEIMQYCFYYKE NRSVR-----------INNTGQAELPK---------------------HHVQNVFID-ELYPGH---YHAAPKKN--EN V-GPGDNGGVEGIKEDMFQVLVCQ >t13 GIVSECRMIIRDE--HDDAIDEM-C------------LVASMVKKLSGCENQARNNHECAI--PPPFHGV---REMFKR VYE-CMEGIGCVNTVAGNP-SESSAWMFDKVMYQLTHSMVW--------GDLRRVVHDNVTFSKLKEGAPHCISH--PY FLGIDMFHIQVYSKGF--LTLPDPRYEISMSVMYSQHHSFSMKRCLNGLDHGDREESPHQIEHKMRKSVLIYN---PIG Y-L-VIIKDANSMMGRLNHHESRAVVAFPLR---GAEGPKQV------------------MEGLFDK-E-E------AR PMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVT EYAVGI----VKPFYD-NEWKQEAQGQGEA-GACIPCVDSKDVQYELKHAYVKKLMHT-SLYLMHIETCHKPVMGTVKG NRCRAIL-KLSMIQLDQ----KTLSQ-NQRAK--LCQRTWLDTSPLQ------MGMTLVLVGKKVGSKK-DYEVAADPV IMYFYDAPMIMRPTDVFEGTNNKKAQAMRS------RSTASQSIGDDDMLN--MPLTQNAMCVESEMVRKCQPD-GQDK RGYTWVAKD-DTLCPQSRDQGEK---HHEPHHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVW QLCESV-DDWQTLSSDACMHAEQNPPVCVWQRELVPQL---NRNIEDLARIIYFGPDEH-------DEGKMMNKIFALE W-EMDMSHRV-SSVDDNYI-IEMLPFVCDRVGPWGHYAQVLAD-QLHLTRPHT---LRDLYILAC--CPVASGKEQDGN K-ERSQHYT-IYAEI----QNDHKNPISANNQPNKMLDWDTGKT-MDRGAA-MCGEIKLFY-------AEPNVWNQNDH SVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFSKLFMEVVCMRYNYTSNHSIKI------FEVYADENSWENVRNF -EMVSGRAGAHINHIPQLPEQAY-PPCKLTVSNLKGVA-ASCPAITEITRTA--VIV--MYGSI-LRNDVMQYVFYYRE NRSVR-----------QNNTGNAELPK---------------------HHMQDVFID-ELYLGP---YHASPKKD--RF C-NPGDNGGMETYQEDMFQVLVCV >t5 GIVSECRMIIRDE--HDDAIDEM-C------------LVASMVKKLSGCENQARNNHECAI--PPPFHGV---REMFKR VYE-CMEGIGCVNTVAGNP-SESSAWMFDKVMYQLTHSMVW--------GDLRRVVHDNVTFSKLKEGAPHCISH--PY FLGIDMFHIQVYSKGF--LTLPDPRYEISMSVMYSQHHSFSMKRCLNGLDHGDREESPHQIEHKMRKSVLIYS---PIG Y-L-VIIKDANSMMGRLNHHESRAIVAFPLR---GAEGPKQV------------------MEGLFDK-E-E------AR PMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVT EYAVGI----VKPFYD-NEWKQEAQGQGEA-GACIPCVDSKDVQYELKHAYVKKLMHT-SLYLMHIETCHKPVMGTVKG NRCRAIL-KLSMIQLDQ----KTLSQ-NQRAK--LCQRTWLDTSPLQ------MGMTLVLVGKKVGSKK-DYEVAADPV IMYFYDAPMIMRPTDVFEGTNNKKAQAMRS------RSTASQSIGDDDMLN--MPLTQNAMCVESEMVRKCQPD-GQDK RGYTWVAKD-DTLCPQSRDQGEK---HHEPHHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVW QLCESV-DDWQTLSSDACMHAEQNPPVCVWQRELVPQL---NRNIEDLARIIYFGPDEH-------DEGKMMNKIFALE W-EMDMSHRV-SSVDDNYI-IEMLPFVCDRVGPWGHYAQVLAD-QLHLTRPHT---LRDLYILAC--CPVASGKEQDGN K-ERSQHYT-IYAEI----QNDHKNPISANNQPNKMLDWDTGKT-MDRGAA-MCGEIKLFY-------AEPNVWNQNDH SVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFSKLFMEVVCMRYNYTSNHSIKI------FEVYADENSWENVRNF -EMVSGRAGAHINHIPQLPEQAY-PPCVLTVSNLKGVA-ASCPAITEITRTA--VIV--MYGSI-LRNDVMQYVFYYRE NRSVR-----------QNNTGNAELPK---------------------HHMQDVFID-ELYLGP---YHASPKKD--RF C-NPGDNGGMDTYQEDMFQVLVCV >t38 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECAI--SHPFHGV---REMFKR -YE-CMEGIGCVNTVAGNP-SESSAWMFDKFMYQLTHSMVW--------GDLGRVYYDNITFSKLKEGAPHCISQ--PF FLGIDMFHIQVYFKGS--LTLPDPRYELSMSVMYSQHHSFSQKRCLNGLDVGDREESPHHIDHKMRKSVLIYN---PIG Y-L-VIIKNANSLMGRLNHHESRLIVTFPLR---GAEGPKQV------------------MEGLFLK-E-E------AR PMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVT EYAVGI----VKPFYD-SEWKQEAQGQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTIKG NRCRAIL-KLTMIQLDQ----KTLSQ-NQRAR--LCQRTWLDTSPLQ------MGMTIVLVGKKVGSKK-DYEIAADPV IMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDDDMLN--MPLTQNAMCVESEMVRKSQPD-GQDK RGYTWVAKE-DDLCPQSGDQGEK---YAEPQHIRN-----------------QFSENVIKLLYCKDC-SCNKVLKILVW QLCESV-DDWQTLASDACVSAEPNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEGKMMCKIFALE W-EMDMSHRM-SSVDDNYI-IEMLPFVCDRVGPWGHYAQILAD-QLNLTRPHT---LRELYILA---CPVASGKEQDGA Q-ERSQCYT-IYAEL----QNDHKSLISANHVPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDH SVMFI-ISDAQNKLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSNHSIKI------FEVYADENSWENIRNF -EMVSGRAGAYINHIPQLPEQAV-PPCHLTVSNLKGVA-ASCPAITEITRTA--VIV--MYVSI-LRNDVMQYVFYYRE NRIVR-----------QNNTGVAELPK---------------------HHMQDVFID-ELYLGP---YHASPKKE--RF C-NVGDNGGMETIQEDMFQVLVCI >t33 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECAI--SHPFHGV---REMFKR -YE-CMEGIGCVNTVAGNP-SESSAWMFDKFMYQLTHSMVW--------GDLGRVYYDNITFSKLKEGAPHCISH--PF FLGIDMFHIQVYFKGS--LTLPDPRYELSMSVMYSQHHSFSQKRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIG Y-L-VIIKNANSLMGRLNHYESRAIVTFPLR---GAEGPKQV------------------MEGLFLK-E-E------AR PMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SHLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVT EYAVGI----VKPFYD-SEWKQEAQGQGEA-GACIPCVDSPDVQYELKHAYVKKLVHN-SLYLMHIDTCHKPAMGTVKG NRCRAIL-KLTMIQLDQ----KTLSQ-NQRAR--LCQRTWLDTSPLQ------MGMTIVLVGKKVGSKK-DYEIAADPV IMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDDDMLN--MPLTQNAMCVESEMVRKCQPD-GQDK RGYTWVAKE-DTLCPQSGDQGEK---YAEPQHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVW QLCESV-DDWQTLASDACVSAEPNTPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEGKMMCKIFALE W-EMDMSHRM-SSVDDNYI-IEMLPFVCDRVGPWGHYAQILAD-QLNLTRPHT---LRELYILA---CPVASGKEQDGA K-ERSQCYT-IYAEL----QNDHKSLISANHVPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDH SVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSNHSIKI------FEVYADENSWENIRNF -EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAITEITRTA--VIV--MYVSI-LRNDVMQYVFYYRE NRIVR-----------QNNTGVAELPK---------------------HHMQDVFID-ELYLGP---YHASPKKE--RF C-NPGDNGGMETIQEDMFQVLVCV >t100 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECAI--SHPFHGV---REMFKR -YE-CMEGIGCVNTVAGNP-SESSAWMFDKFMYQLTHSMVW--------GDLGRVYYDNITFSKLKEGAPHCISH--PF FLGIDMFHIQVYFKGS--LTLPDPRYELSMSVMYSQHHSFSQKRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIG Y-L-VIIKNANSLMGRLNHHESRAIVTFPLR---GAEGPKQV------------------MEGLFLK-E-E------AR PMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVT EYAVGI----VKPFYD-SEWKQEAQGQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTVKG NRCRAIL-KLTMIQLDQ----KTLSQ-NQRAR--LCQRTWLDTSPLQ------MGMTIVLVGKKVGSKK-DYEIAADPV IMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDDDMLN--MPLTQNAMCVESEMVRKCQPD-GQDK RGYTWVAKE-DTLCPQSEDQGEK---YAEPQHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVW QLCESV-DDWQTLASDACVSAEPNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEGKMMCKIFALE W-EMDMSHRM-SSVDDNYI-IEMLPFVCDRVGPWGHYAQILAD-QLNLTRPHT---LRELYILA---CPVASGKEQDGA Q-ERSQCYT-IYAEL----QNDHKSLISANHVPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDH SVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSNHSIKI------FEVYADENSWENIRNF -EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAITEITRTA--VIV--MYVSI-LRNDVMQYVFYYRE NRIVR-----------QNNTGVAELPK---------------------HHMQDVFID-ELYLGP---YHASPKKE--RF C-NPGDNGGMETIQEDMFQVLVCV >t34 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECAI--SHPFHGV---REMFKR -YE-CMEGIGCVNTVAGNP-SESSAWMFDKFMYQLTHSMVW--------GDLGRVYYDNITFSKLKEGAPHCISH--PF FLGIDMFHIQVYFKGS--LTLPDPRYELSMSVMYSQHHSFSQKRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIG Y-L-VIIKNANSLMGRLNHHESRAIVTFPLR---GAEGPKQV------------------MEGLFLK-E-E------AR PMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVT EYAVGI----VKPFYD-SEWKQEAQGQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTVKG NRCRAIL-KLTMIQLDV----KTLSQ-NQRAR--LCQRTWLDTSPLQ------MGMTIVLMGKKVGSKK-DYEIAADPV IMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDDDMLN--MPLTQNAMCVESEMVRKCQPD-GQDK RGYTWVAKE-DTLCPQSGDQGEK---YAEPQHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVW QLCESV-DDWQTLASDACVSAEPNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEGKMMCKIFALE W-EMDMSHRM-SSVDDNYI-IEMLPFVCDRIGPWGHYAQILAD-QLNLTRPHT---LRELYILA---CPVASGKEQDGA Q-ERSQCYT-IYAEL----QNDHKSLISANHVPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDH SVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSNHSIKI------LEVYADENSWENIRNF -EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAMTEITRTA--VIV--MYVSI-LRNDVMRYVFYYRE NRIAR-----------QNNTGVAELPK---------------------HHMQDIFID-ELYLGP---YHASPKKQ--RF C-NPGENGGMETIREDMFQVLVCV >t84 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCDDQARNNHECAI--SHPFHGV---REMFKR -YE-CMEGIGCVNTVAGNP-SESSAWMFDKFMYQLTHSMVW--------GDLGRVYYDNITFSKLKEGAPHCISH--PF FLGIDMFHIQVYFKGS--LTLPDPRYELSMSVMYSQHHSFSQKRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIG Y-L-VIIKNANSLMGRLNHHESRAIVTFPLR---GAEGPKQV------------------MEGLFLK-E-E------AR PMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVT EYAVGI----VKPFYD-SEWKQEAQGQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTVKG NRCRAIL-KLTMIQLDV----KTLSQ-NQRAR--LCQRTWLDTSPLQ------MGMTIVLMGKKVGSKK-DYEIAADPV IMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDDDMLN--MPLTQNAMCVESEMVRKCQPD-GQDK RGYTWVAKE-DTLCPQSGDQGEK---YAEPQHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVW QLCESV-DDWQTLASDACVSAEHNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEGKMMCKIFALE W-EMDMSHRM-SSVDDNYI-IEMLPFVCDRIGPWGHYAQILAD-QLNLTRPHT---LRELYILA---CPVASGKEQDGA K-ERSQCYT-IYAEL----QNDHKSLIAANHVPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDH SVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSNHSIKI------LEVYADENSWENIRNF -EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAMTEITRTA--VIV--MYVSI-LRNDVMRYVFYYRE NRIAR-----------QNNTGVAELPK---------------------HHMQDIFID-ELYLGP---YHASPKKE--RF C-NPGENGGMETIREDMFQVLVCV >t92 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEDQARNNHECAI--SHPFHGV---REMFKR -YE-CMEGIGCVNTVAGNP-SESSAWMFDKFMYQLTHSMVW--------GDLGRVYYDNITFSKLKEGAPHCISH--PF FLGIDMFHIQVYFKGS--LTLPDPRYELSMSVMYSQHHSFSQKRCLNGLDVGDREESPHQIEHKMRKSVLIYN---PIG Y-L-VIIKNANSLMGRLNHHESRAIVTFPLR---GAEGPKQV------------------MEGLFLK-E-E------AR PMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIFAHVT EYAVGI----VKPFYD-SEWKQEAQGQGEA-GACIPCVDSPDVQYELKHAYVKKLVHT-SLYLMHIDTCHKPVMGTVKG NRCRAIL-KLTMIQLDV----KTLSQ-NQRAR--LCQRTWLDTSPLQ------MGMTIVLMGKKVGSKK-DYEIAADPV IMYFYQAPMIMRPTDVFEGTNNRKAQAMRS------RSTASQSIGDDDMLN--MPLTQNAMCVESEMVRKCQPD-GQDK RGYTWVAKE-DTLCPQSGDQGEK---YAEPQHIRN-----------------KFSENVIKLLYCKDC-SCNKVLKILVW QLCESV-DDWQTLASDACVSAEPNPPVCVWQRELVPQL---NRNAEDLARIIYFGPDEH-------DEGKMMCKIFALE W-EMDMSHRM-SSVDDNYI-IEMLPFVCDRIGPWGHYAQILAD-QLNLTRPHT---LRELYILA---CPVASGKEQDGA Q-ERSQCYT-IYAEL----QNDHKSLISANHVPNKMLDWDTGKT-MDRGAA-MCAEIKLFY-------AEPNVWKQNDH SVMFI-ISDAQNRLIVANSIMVGQTFMAMACNPHFAKLFMEVVVMRYDYCSNHSIKI------LEVYADENSWENIRNF -EMVSGRAGAHINHIPQLPEQAC-PPCVLTVSNLKGVA-ASCPAMTEITRTA--VIV--MYVSI-LRNDVMRYVFYYRE NRIAR-----------QNNTGVAELPK---------------------HHMQDIFID-ELYLGP---YHASPKKE--RF C-NPGENGGMETIREDMFQVLVCV >t3 GIVSECRMIIRDE--SDDAIDEM-C------------LVASMVKKLSGCEQQARNNRECAT--SIPFLGV---RELWKR -YE-CMEGIGCVNTVAGKP-SQSSVWMFDRFMYKLTHSMVW--------GDLGRVYWDNLTFQKLKEGAPHCIAV--PN YLNIDMFHIQVFYKGP--LTLPDPHYELSMSVMYAQHHSFSQKRCLNALDHGDREESPHQIEHKMRKSVLLYN---PIG Y-L-VIIKNANSMMGRLNHHESCAIVMFPLR---GAEGPKQI------------------IEGLFDK-E-E------AR PMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SRLPRNDPVVHEVGANVYQQISRMKTSIPIYAHVT EYAVGI----IKPFYD-NEWKQEAQGQGEA-GASIPCVNSPDVQYELKHAHIKKLMHT-SLYLMHLGTCHKPVMGTVKG DRCRAIL-KLSMIQLDQ----KTLSQ-NVRAR--LCQRTWLDTSPLQ------MGMVAVLLGKKLGSKK-DYEIAADPV IKYFYQAPIIMRRTDVFEGTDDQKAQAMRV------RSTASQSIGDDDMLI--MPLVQNAMCVKSEMVRKCQPD-GPDK RGYTWMAKD-DTLCPVSAGQGEK---YAEPHQIKN-----------------KFSVNVIKLLYCKDC-SCNKVLKILVW QLCESV-DDWVTLASDACLSAEPQPPVCVWQRELVPQL---NRNVEDLARVIYFGPDEH-------DEGKMMPKIFKLA W-EMDMSHRA-SSVDDNYV-IEMLPFVCHRVGPWGHYAQVQAD-QQNLTRPHT---LADLFILS---CPVASGKEQDGA T-ERSQYYV-VYAEL----QNDHKSPISANKHPDKMLDWDTGKS-MDRGAA-MCREIKLFY-------AEPNVWKQNDH SVVFI-ISDAVNRLICANSIMCGLTFMAMACNPHFAKLFMEVVCMRYDYFSNHSIKI------FEVYAEDDSWENICNF -EMMSGRTGAQINHIPQLPEQVC-PPCHLTVSNLKGCC-ASVPAITEITRTV--VIC--MYVSI-LCNVVMQYVFYYKE NRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGQ---YHASPKKK--KI C-GPGDNGGMETIEEDMFQVLVCV >t22 GIVSECRVIIRDQ--SDDAIDER-Y------------LVASVVKRLSGCENQARNNRECAI--SIPFLGV---RELWKR -YE-CMEGIGCVNTVAGTP-SESSVWMFDRFMYKLTHSMVW--------GDLGRVYVDNQTFSKLKEGAPHCISA--PY YLNIDMFHIQVFYKGS--LTLPDPRYELSMSVMYAQHHSFSQKRCLNALDYGDREESPHQIEHKMRKSVLLYY---TIG W-L-VIIKNADSMMGRLNHHESCAMVMFPLR---GAEGPKQI------------------MEGLFDK-E-E------AR PMVQDAASVPGR----AQWVGRVRAWCSSDVKA-----------SQLPTNDPIVPEVGANVYQQVSRMKTSIPIYAHVT EYAVGI----VKPFYD-NEWKQEAQGQGEA-GASIPCVNSPDVQYELKHAHIKKLMHT-SLYLMHLKTCHKPVMGTVKG DRCRAIL-KLKMIQLDQ----KTLSQ-NQAAR--LCKRTWLDTSPLQ------MGMNVVLIGQKLGSKH-DYEIAADPV IMYFYQAPIIMRRTDVFEGTDDQKAQAMRL------RSTASRSIGDDDMLI--MPLVQNAMCVNPEMVRKCQPD-GQDK RGYTWMAKN-DTLCPVSAGQGEC---YAEPHQTCN-----------------KFRVNVIKLLYCKDC-SCNKVLKILKW QLCESV-DDWQTLASDACILAKPQPPVCVWQRELVPQL---NRNVEDLARVIYFGPDEH-------DEGKMMSKIFNLE W-EMDMSHRT-SSVDDNYV-IDMLPFICHRVGPWGHYAQVLAD-QINLTRPHT---LRDLFILA---CPVASGKEQDGA T-ERSQHYV-VYAEL----QNDHKSPISANKHPNKLLDWDTGKT-MDRGAA-MCHEIKLFH-------AEPNVWRQNDH SVMFI-ISDASNRLICANSIMCGLTFMAMACNPHFAKLFMEVVCMRYEYFSNHSIKI------FEVYAEENSWENVCNF -EMMSGRAGAQINHIPQLPEQVC-PPCVLTVSNLKGCC-ASCPANTEITRTV--VIC--MYVSI-LRNVVMQYVFYYKE NRSVR-----------QNNTGAAELPK---------------------HHMQDIFID-ELYPGL---YHASPKKK--KV C-GPGDNGGMETIQEDMFQVLVCV >t64 GIVSECRMIIRDE--TDDAIDEV-C------------LVASMVKKLSGCENQTRNNRECAIT-SPPFIGV---RELFKR -YG-CMEGIGCVNTVAGMP-SESSVWMFDRFMYKLTHSMVW--------GDLRRVFPDNATFSKLKEGAPHVISH--PY FLGIDMFHDQVVYRGP--LTLPDPRYELSMSVMYAQHHSFSQKRCLNALDHGDTQESPHGIEHNMRNSVLLYN---PIG F-L-VIIKNINSMMGRLNHHESRAVVGFPLR---GAENPKQI------------------MEGHFDK-E-E------AR PIVQDAASVPGK----AQWVGRVRAWCSSDVKA-----------SKLPRNDPIVPEVGANVYQQLSRMKTNIPIYAHVT EYAVGI----VKPFYD-NEWKQEAQGVGEA-GASIPCVDSPDVQYELKHAEVRKLMHD-SLYLMHLETCHKPIMGTVKG DRCRAIL-KLPMIQLDQ----KTLSH-NQPRR--LCQRTWLDASPLQ------VGMNIMLMGKKAPSKK-DYEIAADPV IMYFYQAPIIMRRTDVFEGTHNKKAQAMRV------RSTASQSIGDDDMLK--MPLVQNAMCVSSEMVRKCQPD-GQDK RGYTWMAKD-DTLCPQSANQGEK---YAEPHVIGD-----------------KVSVNVIKLLYCKDC-SCNQVLKILVW QLCESV-DDWQTLTSDPCMSAQVNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMSKIFALE W-EMDMSHRV-SSVDDNFV-IEMLPFVCHRIGPWGHYAQMLAD-QIVLTRPHT---LRDLFILS---CPVASGGEQDGN T-ERSQHYI-VYAEL----QNDHKSPISANKHLNKMLDWDTGKT-MDREAT-MCREMKLFY-------AETNLWKQNDH SVMFI-ISDAQNRLICGNSIMCGLTFMAMACNPHFAKLFMEVVCVHYDYGANHDIKI------FEVYAAENSWENARNF -EMMSGRAGAEINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPAITEITRTV--VIC--MYVSI-LRNDVMQYVFYYKE NRSVR-----------QNNTGKAELPK---------------------HHMQDIVID-ELYPGP---YHATPKKH--RF C-GTGDNGGMQAIQEDMFQVLVCV >t18 GIVSECRMIIRDE--TDDAIDEV-C------------LVASMVKKLSGCENQTRNNRECAIT-SPPFIGV---RELFKR -YG-CMEGIGCVNTVAGMP-SESSVWMFDRFMYKLTHSMVW--------GDLRRVFPDNATFSKLKEGAPHVISH--PY FLGIDMFHEQVVYRGP--LTLPDPRYELSMSVMYAQHHSFSQKRCLNALDHGDTQESPHGIEHNMRNSVLLYN---PIG F-L-VIIKNINSMMGRLNHHESRAVVGFPLR---GAENPKQI------------------MEGHFDK-E-E------AR PIVQDAASVPGK----AQWVGRVRAWCSSDVKA-----------SKLPRNDPIVPEVGANVYQQLSRMKTNIPIYAHVT EYAVGI----VKPFYD-NEWKQEAQGVGEA-GASIPCVDSPDVQYELKHAEVRKLMHD-SLYLMHLETCHKPIMGTVKG DRCRAIL-KLPMIQLDQ----KTLSH-NQPRR--LCQRTWLDASPLQ------VGMNIMLKGKKAPSKK-DYEIAADPV IMYFYQAPIIMRRTDVFEGTHNKKAQAMRV------RSTASQSIGDDDMLK--MPLVQNAMCVSSEMVRKCQPD-GQDK RGYTWMAKD-DTLCPQSANQGEK---YAEPHVIGD-----------------KVSVNVIKLLYCKDC-SCNQVLKILIW QLCESV-DDWQTLTSDPCMSAQVNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMFKIFALE W-EMDMSHRV-SSVDDNFV-IEMLPFVCHRIGPWGHYAQMLAD-QIVLTRPHT---LRDLFILS---CPVASGGEQDGN T-ERSQHYI-VYAEL----QNDHKSPISANKHLNKMLDWDTGKT-MDREAT-MCREMKLFY-------AETNLWKQNDH SVMFI-ISDAQNRLICGNSIMCGLTFMAMACNPHFAKLFMEVVCVHYDYGANHDIKI------FEVYAAENSWENARNF -EMMSGRAGAEINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPAITEITRTV--VIC--MYVSI-LRNDVMQYVFYYKE NRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGP---YHATPKKH--RF C-GTGDNGGMQAIQEDMFQVLVCV >t68 GIVSECRMIIRDE--TDDAIDEV-C------------LVASMVKKLSGCENQTRNNRECAI--SPPFIGV---RELFKR -YG-CVEGIGCVNTVAGMP-SESSVWMFDRFMYKLTHSMVW--------GDLRRVFPDNAHFSKLKEGAPHVISH--PY FLGIDMFHDQVVYRGP--LTLPDPRYELSMSVMYAQHHSFSQKRCLNALDHGDTQESPHGIEHNMRNSVLLYN---PIG F-L-VIIKNINSMMGRLNHYESRAVVGFPLR---GAENPKQI------------------MEGHFDK-E-E------AR PIVQDAASVPEK----AQWVGRVRAWCSSDVKA-----------SKLPRNDPIVPEVGANVYQQISRMKTNIPIYAHVT EYAVGI----VKPFYD-NEWKQEAQGVGEA-GASIPCVDSPDVQYELKHAEVRKLMHD-SLYLMHLETCHKPIMGTVKG DRCRAIL-KLPMIQLDQ----KTLSH-NQPRR--LCQRTWLDASPLQ------VGMNIMLMGKKAPSKK-NYEIAADPV IMYFYQAPIIMRRTDVFEGTHNKKAQAMRV------RSTASQSIGDDDMLK--MPLRQNAMCVSSEMVRKCQPD-GQDK RGYTWMAKD-DTLCPQSANQGEK---YAEPVVIGD-----------------KVSVNVIKLLYCKDC-SCNQVLKILVW QLCESV-DDWQTLTSDPCMSAQVNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMSKIFALE W-EMDMSHRV-SSVDDNFV-IEMLPFVCHRIGPWGHYAQMLAD-QIVLTRPHT---LRDLFILS---CPVASGGEQDGN T-ERSQHYI-VYAEL----QNDHKSPISANKHLNKMLDWDTGKT-MDREAT-MCREMKLFY-------AETNLWKQNDH SVMFI-ISDAQNRLICGNSIMCGLTFMAMACNPHFAKLFMEVVCVHYDYGANHDIKI------FEVYAVENSWENARNF -EMMSGRAGAEINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPAITEITRTV--VIC--MYVSI-LRNDVMQYVFYYKE NRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGP---DHATPKKH--RF C-GTGDNGGMQTIQEDMFQVLVCV >t28 GIVSECRMIIRDE--ADDAIDEM-C------------LVASMVKKLSGCENQARNNRECAI--SPPFIGV---RELFKR -YH-CMEGIGCVNTVAGMP-SESSVWMFDQFMYKLTHSMIW--------GDLGRVFPDNATFSKLKEGAPHVISH--PY FLGIDMFHAQVFYRGS--LTLPDPRYELSMSVMYSQHHSFSQKRCLNPLDHGDRQESSHGIEHNMRSSVLLYN---PIG F-L-VIIDNINPMMGRLNHHESRAIVTFPLR---GAEHPKQI------------------VEGHFDK-E-E------AR PIVKDAASVPGK----AQWVGRIRAWCSSDVKA-----------SQLPRNDPIVPEVGANVYQQDSRMKTNIPIYAHVT EYACGI----VKPFYE-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLMHLETCHKPIMGTVKG DRCRAIL-KLSMIQLDE----KTLSQ-NQRPG--LCQRTWLDANPLQ------MGMNTMLVGKKTPSKK-DYEIAADPV IMYFYQAPIIMRRTDVFEGTNNKKSQAMRV------QSTSSQSIGDDDMLN--MPLVQNAMCVSSEMVRKCQPD-GQNK RGCTWMAKN-DTLCPQSGNQGEK---FAEPHQIRN-----------------KVSVNVIKLLYCKDC-SCNKMLKILVW QLCESV-DDWQTLASDPCVSAEPNSPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMSKIFALE W-EMDMSHRI-SSVDYNYV-IEMLPFVYHRVGPWGHYAQMLSD-QIVLTRPHT---LRDLFILA---CRVASGGEVDGN T-ERSQHYI-VYAEL----QNDHKSPISANKVLNKMLDWDTGKT-MDREAA-MCREIKLFY-------AETNVWKQNDH SVMFI-ISDAQNRLICGNSIMCGITFMAMACNPHFAKLFMEVACMRYDYFANPDIKI------FEVYAAENSWENVRNF -EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPDITEITRTV--VIC--MYVSI-LRNDVMQYVFYYKE NRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGH---YHATPKKN--RY C-GPGDNGGMQVIEEDMFQVLVYV >t82 GIVSECRMIIRDE--ADDAIDEM-C------------LVASMVKKLSGCENQARNNRECAI--SPPFIGL---RELFKR -YV-CMEGIGCVNTVAGMP-SESSVWMFDRFMYKLTHSMVW--------GDLGRVFPDNCTFSKLKEGAPHVISH--PY FLGIDMFHGQVVYRGS--LTLPDPRYELSMSVMYSQHHSFSQKRCLNPLDHGDRQESPHGIEHNMRSSVLLYN---PIG F-L-VIIDNINPMMGRLNHHESRAIVTFPLR---GAEHPKQI------------------MEGHFDK-E-E------AR PIVQDAASVPGE----AQWVGHIRAWCSSDIKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIYAHTT EYACGI----VKPFYE-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHANVKKLMHD-SLYLMHLETCHKPIMGTVKG DRCRAIL-KLNMIQLDQ----KTLSQ-NQRPG--LCQRTWLDASPLQ------MGMHIMLVGKKTPSKK-DYEIAADPV IMYFYQAPIIMQRTDVFEGTNNKKSQAMRV------HSTSSQSIGDDDMLN--MPLVQNAMCVSSEMVRKCQPD-GQNK RGYTWMAKH-DTLCPQSGNQGEK---YAEPHQIRN-----------------KVSVNVIKLLYCKDC-SCNKVLKILVW QLCESV-DDWQTLASDPCMSAEPNPPVCVWQRELVPQL---NRNVEDLARTIYFGPDEH-------DEGKMMSKIYALE W-EMDMSHRV-SSVDYNYV-IEMLPFVYHRVGPWGHYAQMLAD-QIVLTRPHT---LRDLFILA---CPVASGGEQDGN A-ERSQHYI-VYAEL----VNDHKSPISANKVLNKMLDWDTGKT-MDREAA-MCRDIKLFY-------AETNVWKQNDH SVMFI-ISDAQNRLICGNSIMCGLTFMAMACNPHFAKLFMEVACMRYDYFANPDIKI------FEVYADENSWENVRNF -EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPDITEITRTV--VIC--MYISI-LRNDVMQYVFYYKE NRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGH---YHATPKKN--RF C-GPGDNGGMQVIEEDMFQVLVYV >t41 GIVSECRMIIRDE--ADDAIDEM-C------------LVASMVKKLSGCENQARNNRECAI--SPPFIGL---RELFKR -YV-CMEGIGCVNTVAGMP-SESSVWMFDRFMYKLTHSMVW--------GDLGRVFPDNCTFSKLKEGAPHVISH--PY FLGIDMFHGQVVYRGS--LTLPDPRYELSMSVMYSQHHSFSQKRCLNPLDHGDRQESPHGIEHNMRSSVLLYN---PIG F-L-VIIDNINPMMGRLNHHESRAIVTFPLR---GAEHPKQI------------------MEGHFDK-E-E------AR PIVQDAASVPGE----AQWVGHIRAWCSSDIKA-----------SQLPRNDPIVPEVGANVYQQVSRMKTNIPIYAHTT EYACGI----VKPFYE-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHANVKKLMHD-SLYLMHLETCHKPIMGTVKG DRCRAIL-KLNMIQLDQ----KTLSQ-NQRPG--LCQRTWLDASPLQ------MGMHIMLVGKKTPSKK-DYEIAADPV IMYFYQAPIIMQRTDVFEGTNNKKSQAMRV------HSTSSQSIGDDDMLN--MPLVQNAMCVSSEMVRKCQPD-GQNK RGYTWMAKH-DTLCPQSGNQGEK---YAEPHQIRN-----------------KVSVNVIKLLYCKDC-SCNKVLKILVW QLCESV-DDWQTLASDPCMSAEPNPPVCVWQRELVPQL---NRNVEDLARTIYFGPDEH-------DEGKMMSKIYALE W-EMDMSHRV-SSVDYNYV-IEMLPFVYHRVGPWGHYAQMLAD-QIVLTRPHT---LRDLFILA---CPVASGGEQDGN A-ERSQHYI-VYAEL----VNDHKSPISANKVLNKMLDWDTGKT-MDREAA-MCRDIKLFY-------AETNVWKQNDH SVMFI-ISDAQNRLICGNSIMCGLTFMAMACNPHFAKLFMEVACMRYDYFANPDIKI------FEVYADENSWENVRNF -EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPDITEITRTV--VIC--MYISI-LRNDVMQYVFYYKE NRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGH---YHATPKKN--RF C-GPGDNGGMQVIEEDMFQVLVYV >t71 GIVSECRMIIRDQ--SDDAIDEMVV------------LVASMVKEMSGCENQARNNSECAI--SPQFIGV---RELFKR -YR-CMEGIGCVNTVAGAP-SESSVWMFDRFMYKLTHSMVW--------GDLARVFHDNGTFAKLKEGAPHVISV--PY FLGIDMFHGQVFLRGS--LTLPDPRYELSMSVMYAQHHSFSQKRCLNHLDHGDRQESPHGIEHNMRKSVLLYN---PQG Y-L-VIIKNVNSMMGRLNHHESRAVVTFPLR---GADNPRQI------------------MEGHFDK-E-E------AR PVVQDAPGVPGK----AQWVGKVRAWCSSDVKA-----------SQLPRNDTIVPEVGANVYQQVSRVKTHIPIYAHVT EYAVGI----VKPFYN-NEWKQEAQGQGEA-GASIPCVDSPDDQYELKHAEVKKLMHD-SLYLPHLETCHKPIMGTVKG GRCRASL-KLKMIQLDQ----KTLSQ-NQQRR--LCQRTWLDSSPLQ------MGMKIMLQGKKTPSKK-DYEIAADPA IMHFYRAPIIMRRTDVFEGTNNKKAQAMRI------RSTASQSIGDDDMLN--MPLHQNAMCVSSEMVRKCQPD-GQDK RGYTWMAKD-DTLCPQSPNQGEK---YAEPHQIRN-----------------KISVNVIKLLYCKDC-SCNQVLKILVW QLCESV-DDWQTLTSDPCVPAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMRKIFALE W-EMDMSHRD-SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-RLVLTRPHT---LRDLFILA---CPVASGGEQDGN T-ERSQHYI-VYAEL----QNDHKSPISANKHMNKMLDWDTGKT-MDFEAA-MCREIKLFY-------AETNVWKINDH SVMFI-VSDAQNRLICGNSIMCGLTFMAMACNPHFAKLFMEGVCMRYDYSANHDIKI------FEVYADENSWENVRNF -EMMSGRAGAQINH-PQLPEQVCNPPCHLTVSNLKGVC-ASCPAITEITRTV--VIV--MWISI-LRNDVMQYVVYYKE NRSVR-----------QNNTGKAELPR---------------------HPMQEIFID-ELYPGH---YHATPKKN--RF C-GPGDNGGMQSMQEDVFQVLVCV >t94 GIVSECRMIIRDQ--SDDAIDEMVC------------LVASMVKELSGCENQARNNRECAI--SPPFTGV---RELFKR -YR-CMEGIGCVNTVAGIP-SESSVWMFDRFMYRLTHSMVW--------GDLGRVFQDNATFSKLKEGAPHVISH--PY FLGIDMFHGQVFYRGS--LTLPDPRYELSMSVMYAQHHSMSQKRCLNTLDHGDRQESPHGIEHNMRKSVLLYD---PIG Y-L-VIIKNVNSMMGRLNHHESRAKVTFPLR---GADNPRQI------------------MEGHFDK-E-E------AR PVVQDAPGVPEQ----AQWVGKMRAWCSSDVKA-----------SQLPRNDTIVPEVGANVYQQVSRVKTNIPIYAHVT EYAVGI----VKPFYN-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLPHLETCHKPIMGTVKG GRCRASL-KLNMIQLDQ----KTLSQ-NQQRR--LCQRTWLDASPLQ------MGMKIMLQGKKIPSKK-DYEIAADPA IMYFYQATIIMRRTDVFEGTNNKKAQAMRM------RSTASQSIGDDDMLN--MPLHQNAMCVSSEMVRKCQPD-GQDK RGYTWMAKD-DTLYPQSANQGEK---YAEPHQIRN-----------------KISVNVIKLLYCKDC-SCNKVLKILVW QLCESV-DDWQTLTSDPCVPAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMRKIFALE W-EMDMSHRD-SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-KLVLTRPHT---LRDLFILA---CYVASGGEQDGN T-ERSQHYI-VYAEL----QNDHKSPILANKVVNKMLDWDMGKT-MDFEAA-MCREIKLFY-------AETNVWKQNNH SVMFI-VSDAHNRLICGNSIMCALTFMAMACNPHFAKLFMEAVCMRYDYSANHDIKI------VEVYADENSWENVRNF -EMMSGRAGAQINH-PQLPEQVCNPPCHLTVSNLKGVC-ASCPAITEITRTV--VIV--MWISI-LRNDVMQYVFYYKE NRSVR-----------QNNTGKAELPR---------------------HPVQDIFID-ELYPGH---YHATPKKV--RF C-GPGDNGGMQAIQEDVFQVLVCV >t99 GIVSEVRMIIRDE--SDDAIDEM-C------------LVASMVKALSGCENQARNNRECAI--SPPFIGV---RELFKR -YG-CMEGIGCVNTVAGIP-SESSIWMFDRFMYKLTHSMVW--------GDLGQVFPDVSTFSKLKEGAPHVISQ--PY FLGIDMFHDQVFYRGS--LTLPDPRYELSMSVMYAQHHSFSQKRCLNALDHGDRQESPHGIEHNMRKSVLLDS---PIG Y-L-VIIKNINSMMGRLNHHESRAVFAFPLR---GAENPKQI------------------MEGHFDK-EKE------AR PVVQDAPGVPGK----AQWVGRIRAWCSSDVKA-----------SELPRNDAIVPEVGANVYQQVSRMKTNIPIYAHAT EYAVGI----VKPFYD-NEWKQEAQGQGET-GASIPCVDSPDVQYEMKHAEVKKLMHD-SLYLMHIETCHKPIMGTVKG DRCRATL-KLKMIQLDQ----KTLSQ-NQKRR--LCQRTWLDASPLQ------MGMKMMLQGKKTPSKK-DYEIAADPV IMYFYQSPIIMRRTDVFEGTNNKKAQAMRV------RSTASQSIGDDEMLN--MPLVQNAMCVPSEMVRKCQPD-GQDK RGYTWMAKD-DPLCPQSANVGEK---YAEPHQIRN-----------------KVSVNVIKLLYCKDC-SCNKVLKILVW QLCDSVQDDWQPLTSDPCVNAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMNKIFALE W-EMDMSHRDSSSVDDNYI-IEMLPFVCHRVGPWGHYAQQLAD-RLVLTRPHT---LRDIFILA---CPVASGGEQDGN T-ERSQHYI-VYAEL----QDDHKSPISANKHMRKMLDWDTGKT-MDREAA-VCREIKLFY-------AETNCWKQNDH SVMFI-VSDAQNRLICGSSIMCRLTFMAMAVNPHFAKLFMEDVCMRYDYSPNHDIKI------FEVYADENSWENVRNF -EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASCPAITEITRTV--VIC--MYVSI-LRNDVMQYVFYYKE NRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGR---YHATPKKN--RY C-GPGDNGGMQPIQEDMFQVLVCV >t40 GIVSECRMIIRDE--SDDAIDEV-C------------LVASMVKALSGCENQARNNRECAI--SPPFIGV---RELFKR -YG-CMEGIGCVNTVAGIP-SESSVWMFDRFMYKLTHSMVW--------GDLGRVYPDNPTFSKLKEGAPHVISH--PY FLGIDMFHDQVFYRGS--LTLPDPRYELSMSVMYAQHHSFSQKRCLNALDHGDRQESPHGIEHNMRKSVLLDS---SIG Y-I-VIIKNLNSMMGRLNHHESRAVVTFPLR---GAEDPKQI------------------MEGHFDK-E-E------AR PVVQDAPGVPGK----AQWVGRVRAWCSSEIKT-----------SELPRNDAIVVEVGANVYQQISRMKTDIPIYAHAT EYAVGI----VKPVYD-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLMHLETCHKPIMGTVKG DRCRATL-KLKMIQLDQ----KTLSQ-NQKKR--LCQCTWLDACPLQ------MGMKIMLQGKKTPSKK-DYEIAADPV IMYFYQSPIIMRRTDVFEGTNNKKAQAMRI------RSTASQSIGDDEMLN--MPQVQNAMCVSSEMVRKCQPD-GQDK RGYTWMAKD-DTLCPQSANVGEK---YAEPHQIRN-----------------KVSTNVIKLLYCKDC-SCNKVLKILVW QLCDSV-DDWQTLTSDPCVSAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMRKIFALE W-EMDMSHRN-SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-RLVLTRPHT---LRNIFILA---CPVASGGEQDEN T-ERSQHYI-VYAEL----QNDHKSPIHANKVMNKMLDWDTGKT-MDREAA-MCREIKLFV-------AETNVWKQNDH SVMFIVISDAQNRLICGSSIMCRLTFMAMAVNPHFAKLFMEESCVRYDYSANHDIKI------FEVYANENSWENVRNF -EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASVPAITEITRTV--VIC--MYISI-LRNNVMQYVFYYKE NRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGH---YQATPKKD--KF C-GPGDNGGMQTIQEDMFQVLVCV >t90 GIVSECRMIIRDE--SDDAIDEV-C------------LVASMVKALSGCENQARNNRECAI--SPPFIGV---RELFKR -YG-CMEGIGCVNTVAGIP-SESSVWMFDRFMYKLTHSMVW--------GDLGRVYPDNPTFSKLKEGAPHVISH--PY FLGIDMFHDQVFYRGS--LTLPDPRYELSMSVMYAQHHSFSQKRCLNALDHGDRQESPHGIEHNMRKSVLLDS---PIG Y-I-VIIKNVNSMMGRLNHHESRAVVTFPLR---GAEDPKQI------------------MEGHFDK-E-E------AR PVVQDAPGVPGK----AQWVGRVRAWCSSEIKT-----------SELPRNDAIVPEVGANVYQQISRMKTDIPIYAHAT EYAVGI----VKPVYD-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLMHLETCHKPIMGTVKG DRCRATL-KLKMIQLDQ----KTLSQ-NQKKR--LCQCTWLDACPLQ------LGMKIMLQGKKTPSKK-DYEIAADPV IMYFYQSPIIMRRTDVFEGTNNKKAQAMRV------RSTASQSIGDDEMLN--MPLVQNAMCVSSEMVRKCQPD-GQDK RGYTWMAKD-DTLCPQSANQGEK---YAEPHQIRN-----------------KVSTNVIKLLYCKDC-SCNKVLKILVW QLCDSV-DDWQTLTSDPCVSAEPNPPVCVWQRELVPQL---NRNVEDLARIIYFGPDEH-------DEGKMMRKIFALE W-EMDMSHRN-SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-RLVLTRPHT---LRNIFILA---CPVASGGEQDEN T-ERSQHYI-VYAEL----QNDHKSPINANKVMNKMLDWDTGKT-MDREAA-MCREIKLFV-------AETNVWKQNDH SVMFIVISDAQNRLICGSSIMCGLTFMAMAVNPHFAKLFMEESCVRYDYSRNHDIKI------FEVYANENSWENVRNF -EMMSGRAGAQINH-PQLPEQVC-PPCHLTVSNLKGVC-ASVPAITEITRTV--VIC--MYISI-LRNNVMQYVFYYKE NRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGH---YHATPKKD--KF C-GPGDNGGMQTIQEDMFQVLVCV >t4 GIVSECRMIIRDE--PDDAIDEV-C------------LVASMVKELSGCENQARNNRECAI--SPPFIGV---RELFKR -YG-CMEGIGCVNTVAGMP-SESSVWMFDRFMYKLTHSMVW--------GDLGRVFPDNATFSKLKEGAPHVISV--PY FLGIDMFHDQVFYRGS--LTLPDPRYEMSMSVMYAQHHSFSQKRCLNALDYGDRQESPHGIEHNMRKNVLLDN---PIG H-L-VIIKNENSMMGRLNHHESRAVVTFPLR---GAEDPKQK------------------MEGSFDK-E-E------AR PVVQDAPGVPGR----AQWVGRVRPWCSSDVKA-----------SELPRNDAIWPEVGANVYQQISRMKTNIPIYPHVT EYAVGI----VKPFYL-NEWKQEAQGQGEA-GASIPCVDSPDVQYELKHAEVKKLMHD-SLYLWHLETCVKPIMGTVKG DRCRATL-KLKMIVLDP----KTLSQ-NQKRR--LCQRTWLDASPLQ------VGMKIMLQGKKTPSKK-DYEIAADPV IMYFYQSPIIMRHTDVFEGTNNKKAQAMRL------RSTASQSIGDDEMLN--MPLIQNAMCVQSEMVRKCQPD-GQDK RGYTWMAKD-DTLCPQLHNQAEK---YAEPHQIRN-----------------KVSVNVIKLLYCKDC-SCNKVLKILVW QLCESV-DDWQTLTSDPCISAEPNPPVCVWQRELVPQL---NRNIEDLARIIYFGPDEH-------DEGKMMGKIFALE W-EHDMSHRD-SSVDDNYV-IEMLPFMCHRVGPWGHYAQQLAD-RLVLTRPHT---LRDIFILA---CPVASGGEQDAV T-ERSQHYI-VYAEL----QNDHKSPISASKHMNKMLDWDTGKT-MDREAA-MCREIKLFY-------AETNVWKQNDH SVMFI-ISDAQNRLICGSSIMCGLTFMATACNPHFAKLFMEEVCMRYDYAANHDIKI------SEVYAEENSWENVRNF -EMMSGRAGAQINH-PQLPEQVV-PPCHLTVSNLKGVC-ASCPAITEITRTV--VIC--MYVSI-LRNVVMQYVFYYKE NRSVR-----------QNNTGKAELPK---------------------HHMQDIFID-ELYPGH---YHATPKKN--RC C-GPGDNGGMQTVQEDMFQVLVCV >t36 MIWSEVRKMVRER--REQAIDNK-C------------LIAARVKMMSGYVLQVRNALEDAL--YVFFTGTKGTRELVKY -YM-CMEGHGCVNTMAKKS-SQSGIWKFNAFMC-LMHLMEG--------GDL--------------------CSY--PS FLGMCMFHAMVQSKGD--LVLPDSPNEDSVSFMYIQHHSFHEKRCLNPLNVGDREDSEHIKEKHMRI----YA---NIG G-H-IIIREWNDVMGRLNHIEPGAEVIFPLRK--RGQHSKPV------------------IDGFFAK-D-E------DR PGIQNAVSVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEPSKLHKTYRINRKVGANAYDQDSRMKAAAPVYPHIM EYAHGM----FNPFYGLSEPKNNAQGNGEN-PMNKPCVESEDCQYEKKHASMDKLMHQ-SLYLMHINIMSKPAMGEWVG NRCRNELTALRIIQLDVGVSGKTLGQ-NIGVSKLLNDRTWLATSPLE------IGCGVMSVEKKEASPK-EFEVAADPT VIYFYRNLIIQKITDVLSAVRMESPQEMRT------QDVNSSSLGDNNGQA--MNIVPYWVCVASGVVRKTHKD-SVDK RGQTWTAKS-DFLCPLAVDQGEP---GAEQKPAVGEENPICKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKW QLCDSH-GDWQSLYADSCPIA-VNAAVCGWKRELVPGL---NHSCEHLAKSVYFEPDGE-------GEGKVMLKIFGLD WCEVERSHEH-SSVDDNYN-VNMLPFQNSRKDPVGHYVQDLED-ARRLIRPGT---ARSLTILF-YGCQYYSGEFQDCE I-ERSQLYN-VYCEH----KQDHKSAIIANKQEQKGMDWNTGKE-MEQGPK-IILHGSLFF-------AEPNIVRQPGV SHIFI-GNDARRVLICGKSMMPGHRFMREACVPFFHKLFMAVNQMRYDYMMNYDIKIYETHWRMGVYALDNSWETLNVS -EMTSGRIGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPDIKIY--MWGTINFKNRVMQYCNYYKE NRSVR-----------INNTGLAELPK---------------------YHHQNILID-ELYSNV---YPAAPKKH--QY M-GVGDVGGYEVICENLFQILVVE >t87 MIWSEVRKMVRER--REQAIDNK-C------------LIAARVKMMSGYMLQVRNALEDAL--YVFFTGNKGTRELVKY -YM-CMEGHGCVNTMAKKS-SQSGIWKFNAFMC-LMHLMEG--------GDL--------------------CSY--PS FLGMCMFHAMVQSKGD--LVLPDSPNEDSVSFMYIQHHSFHEKRCLNPLNVGDREDSDHVKEKHMRI----YA---NIG G-H-IIIREWNDVMGRLNHIEPGAEVIFPLRK--RGQHSKPV------------------IDGFFAK-D-E------DR PGIQNAMSVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEPSKLHKTYRINRKVGANAYDQDSRMKAAAPVYPHIM EYAHGM----FNPFYGLSEPKNNAQGNGEN-PMNKPCVESEDCQYEKKHASMDKLMHQ-SLYLMHINVMSKPAMGEWVG NRCRNDLTALRIIQLDVGHSGKTLGQ-NIGVSKLLNDRTWLATSPLE------IGCLVMSVEKKEASGK-EFEVAADPT VIYFYRNLIIAKITDVLSAVRMESPQEMRE------QDVNSSSLGDNNGQA--MNIVPYWVCVGSGVVRKTHKD-SVDK RGQTWTAKS-DFLCPLAMDQGEI---GAEQKPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKW QLCDSH-GDWQSLYADSCPIA-INAAVCGWKRELVPGL---NHSCEHLAKSVYFEPDGE-------GEGKVMLKIFGLD WCEVERSHEH-SSVDDNYN-VNMLPFQNSRKDPVGHYVQDLED-ARRLIRPGT---ARSLTILF-YGCQYYSGEFQDCE I-ERSQLYN-VYCEH----KQDHKSAIIANKQEQKGMDWNTGKE-MEQGPK-VILHGTLFF-------AESNIVRQPGV SHIFI-GNDARRVLICGLSMMPGHRFMREACVPFFHKLFMAVNQMRYDYMVNYDIKIYETHWRMGVYALDNSWETLNVS -EMTSGRIGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPDIKIY--MWGTICFKNRVMQYCNYYKE NRSVR-----------INNTGLAELPK---------------------YHHQNILID-ELYSNV---YPAAPKKH--QY M-GVGDVGGYEMICENLFQILVVE >t89 MIWSEHRKMVREG--REQAIDNK-C------------LIAKRVKVMSGYILQVRNAPEDAL--YNFFTGHKGTRELVKY -YM-CMEGEGVVNTMAKKS-SQSGIWKFNAFMC-LMRLMQG--------GDL--------------------CSY--PS FLGMCMFYAMVQSKGD--LVLPDSCNEDSVSFMYIQHHSFHEKRCLNPLNVGDREDSDHTKEKHMRI----YA---NIG G-H-IIIREWNDVMGRLNVIEPEAEVIFPLRK--RGQHSKPV------------------IDGFFAK-D-E------ER PGIQNAMNVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEPSKLHKTYKINRKVGANAYDQVSRMKAAAPVYPHIM EYAHGM----FNPFYGLSEPKNNAQGNGEN-PMNVPCVESEDCQYEKKHASMTKLMHQ-SLYLMHINIMSKPAMGEWVG VRCRNELTALRIVQLDVGHSGKTLGQ-NIGVSKLLNDRTWLATSPLE------IGCGVISVEKKEASPK-EFEVAADPT VIYFYRSLIIVHITDVLSAVRMDSPQEMRN------QDVNSPSLGDNNGQA--MNIVPYWVCVASGVVRKMHKD-SVEK RGQTWEAKS-DFLCPLAVDQGEP---AAEQRPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKW QLCDSH-GDWQSLFPDSCPFA-LNAAVCGWKRELVPGL---NHSCEHLAKGVYFEPDGE-------DEGKMMLKIFGLE WCEVERSHEH-SSIDDNYT-VNMLEFPNSRKDPVGHYVQNLED-AIRLIRPGT---ARSLTILL-YGCQYYSGEFQDCE V-ERSQCYN-VYCEL----KQDHKSAIIANKQEQKGMDWNTGKE-MEQGPK-IILHGCLFF-------AEPNIVRQPGV SHIFT-ENDARRMLICGNSMMPRHRFMREACVPFFHKLFMAVNQMRYDYVTNYDIKIYETHWRVGVYAIDNSWETLNVS -EMTSGRMGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPDIEIY--MWGTINFKNQVMQYCNYYKE NRSVR-----------INNTGGAELPK---------------------YHHQNILID-ELYNNI---YPAAPKKH--KF L-GVGDVGGYEIICENLFQILVVE >t39 MIWSEHRKMVREG--REQAIDNK-C------------LIAKRVKVMSGYILQVRNAPEDAL--YNFFTGHKGTRELVKY -YM-CMEGEGVVNTMAKKS-SQSGIWKFNAFMC-LMRLMQG--------GDL--------------------CSY--PS FLGMCMFYAMVQSKGD--LVLPDSCNEDSVSFMYIQHHSFHEKRCLNPLNVGDREDSDHTKEKHMRI----YA---NIG G-H-IIIREWNDVMGRLNVIEPEAEVIFPLRK--RGQHSKPV------------------IDGFFAK-D-E------ER PGIQNAMNVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEPSKLHKTYKINRKVGANAYDQVSRMKAAAPVYPHIM EYAHGM----FNPFYGLSEPKNNAQGNGEN-PMNVPCVESEDCQYEKKHASMTKLMHQ-SLYLMHINIMSKPAMGEWVG VRCRNELTALRIVQLDVGHSGKTLGQ-NIGVSKLLNDRTWLATSPLE------IGCGVISVEKKEASPK-EFEVAADPT VIYFYRSLIIVHITDVLSAVRMDSPQEMRN------QDVNSPSLGDNNGQA--MNIVPYWVCVASGVVRKMHKD-SVEK RGQTWEAKS-DFLCPLAVDQGEP---AAEQRPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKW QLCDSH-GDWQSLFPDSCPFA-LNAAVCGWKRELVPGL---NHSCEHLAKGVYFEPDGE-------DEGKMMLKIFGLE WCEVERSHEH-SSIDDNYT-VNMLEFPNSRKDPVGHYVQNLED-AIRLIRPGT---ARSLTILL-YGCQYYSGEFQDCE V-ERSQCYN-VYCEL----KQDHKSAIIANKQEQKGMDWNTGKE-MEQGPK-IILHGCLFF-------AEPNIVRQPGV SHIFT-ENDARRMLICGNSMMPRHRFMREACVPFFHKLFMAVNQMRYDYVTNYDIKIYETHWRVGVYAIDNSWETLNVS -EMTSGRMGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPDIEIY--MWGTINFKNQVMQYCNYYKE NRSVR-----------INNTGGAELPK---------------------YHHQNILID-ELYNNI---YPAAPKKH--KF L-GVGDVGGYEIICENLFQILVVE >t8 MIWSEHRKMVREG--REQAIDNK-C------------LIAKRVKVMSGYILQVRNAPEDAL--YNFFTGHKGTRELVKY -YM-CMEGEGVVNTMAKKS-SQSGIWKFNAFMC-LMRLMQG--------GDL--------------------CSY--PS FLGMCMFYAMVQSKGD--LVLPDSCNEDSVSFMYIQHHSFHEKRCLNPLNVGDREDSDHTKEKHMRI----YA---NIG G-H-IIIREWNDVMGRLNVIEPEAEVIFPLRK--RGQHSKPV------------------IDGFFAK-D-E------ER PGIQNAMNVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEPSKLHKTYKINRKVGANAYDQVSRMKAAAPVYPHIM EYAHGM----FNPFYGLSEPKNNAQGNGEN-PMNVPCVESEDCQYEKKHASMTKLMHQ-SLYLMHVNIMSKPAMGEWVG VRCRNELTALRIVQLDVGHSGKTLGQ-NIGVSKLLNDRTWLATSPLE------IGCGVISVEKKEASPK-EFEVAADPT VIYFYRSLIIVHITDVLSAVRMDSPQEMRN------QDVNSPSLGDNNGQA--MNIVPYWVCVASGVVRKMHKD-SVEK RGQTWEAKS-DFLCPLAVDQGEP---AAEQKPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKW QLCDSH-GDWQSLFPDSCPFA-LNAAVCGWKRELVPGL---NHSCEHLAKGVYFEPDGE-------DEGKMMLKIFGLE WCEVERSHEH-SSIDDNYT-VNMLPFPNSRKDPVGHYVQDLED-AIRLIRPGT---ARSLTILL-YGCQYYSGEFQDCE V-ERSQCYN-VYCEL----KQDHKSAIIANKQEQKGMDWNTGKE-MEQGPK-IILHGCLFF-------AEPNIVRQPGV SHIFT-ENDARRMLICGNSMMPRHRFMREACVPFFHKLFMAANQMRYDYVTNYDIKIYETHWRVGVYAIDNSWETLNVS -EMTSGRMGAKINHLPRLPEHVI-PPCMLTVSGLKDTM-AGMKQETEITKTPDIEIY--MWGTINFKNQVMQYCNYYKE NRSVR-----------INNTGGAELPK---------------------YHHQNILID-ELYNDI---YPAAPKKH--KF L-GVGDVGGYEIICENLFQILVVE >t6 MIWSEHRKMVREG--REQAIDNK-C------------LIAKRVKVMSGYILQVRNAPEDAL--YNFFTGHKGTRELVKY -YM-CMEGEGVVNTMAKKS-SQSGIWKFNAFMC-LMRLMQG--------GDL--------------------CSH--PS FLGMCMFYAMVQSKGD--LVLPDSCNEDSVSFMYIQHHSFHEKRCLNPLNVGDREDSDHTKEKHMRI----YA---NIG G-H-IIIREWNDVMGRLNVIEPEAEVIFPLRK--RGQHSKPV------------------IDGFFAK-D-E------ER PGIQNAMNVPCG----DQWVGSIRGWCSSQHRYGLANHILVHEPSKLHKTYKVVRKVGANAYDQVSRMKAAAPVYPHIM EYAHGM----FNPFYGLSEPKNNAQGNGEN-PMNVPCVESEDCQYEKKHASMTKLMHQ-SLYLMHINIMSKPAMGEWVG VRCRNELTALRIVQLDVGHSGKTLGQ-NIGVSKLLNDRTWLATSPLE------IGCGVISVEKKEASPK-EFEVAADPT VIYFYRSLIIVHITDVLSAVRMDSPQEMRN------QDVNSPSLGDNNGQA--MNIVPYWVCVASGVVRKMHKD-SVEK RGQTWEAKS-DFLCPLAVDQGEP---AAEQKPAVGEKNPICKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKW QLCDSH-GDWQSLFPDSCPFA-LNAAVCGWKRELVPGL---NHSCEHLAKGVYFEPDGE-------DEGKMMLKIFGLE WCEVERSHER-SSIDDNYT-VNMLPFPNSRKDPVGHYVQDLED-AIRLIRPGT---ARSLTILL-YGCQYYSGEFQDCE V-ERSQCYN-IYCEL----KQDHKSAIIANKQEQKGMDWNTGKE-MEQGPK-IILHGCLFF-------AEPNIVRQPGV SHIFI-ENDARRMLICGNSMMPRHRFMREACVPFFHKLFMAVNQMRYDYMTNYDIKIYETHWRVGVYAIDNSWETLNVS -EMTSGRMGAKINHLPRLPEQVI-PPCMLTVSGLKDTM-AGMKQETEITKTPDIEIY--MWGTINFKNQVMQYCNYYKE NRSVR-----------INNTGGAELPK---------------------YHHQNILID-ELYNNI---YPAAPKKH--KF L-GVGDVGGYEIICENLFQILVVE >t58 MIWSEYRHMVREG--RDQAIDNK-C------------LIAARVKVMSGYVLQVRNAPEDAL--YNFFTGHKGTRELVKY -YM-CMEGEGCVNTMAKKS-SQSGIWKFNAFMC-LMHLMQG--------GDL--------------------CSH--PS FLGMCMFHAMVKSKGN--LVLPDSCNEDSVSFMYIQHHSFHEKRCLNHLNVGDREDSEHIKEKHMRV----YA---SIG G-H-IIIREWNDVMGRLNHIEPGAEVTFPLRR--RGQASKPV------------------IDGFFAK-D-E------DR PGIQNAMSVPCG----DQWVGSVRGWCSSQHRYGLAIHILVHQQSRLHKTYNIDRKNGANAYEQDSRMKAGAPVYPHIM EYAHGM----FNPFYGMSEPKNNAQGNGEN-PMNVPCVESDDCQYEKKHASMDKQMHQ-SLYLMHMNIMSKPAMGEWVG NRCRNELTALHVVQLDVGFSGKTLGQ-NIGISELLNDRTWLATSPLE------IGCGVMAVEKKEASPK-EFEVAADPT VIYFYRNLIIQHITDVLSAVRMDSPQEMRV------QDVNSPSLGDNNGQA--MNIVPYWVCVVSGVVRKTHKD-SVEK RGQTWTAKS-DFLCPIAVNQGEP---GAEQKPAVGA-NPIVKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKW QLCDSH-GDWQSLFADSCPTA-VNAAVCAWKRELVPGL---NHSCEHLAKSVYFEPDGE-------DEGKMMLKIFGLD WCEVERSHEH-SSVDDNYN-VNMLPFPNSRQDPVGHYVQDLED-LLRLIRPGT---ARSLTVLF-YGCQYYSGQFQDCE I-ERSQLYN-VYCEL----KQDHKSAIMANKQEQKGMDWNTGKE-MEQGPK-IILHGVLFF-------AEPNVVRQPGV SHIFV-GNDARRMLICGLSMMPGHRFMKEACVPFFHKLFMAVRRMRYDYMSNYDIKIYETHWRPGVYALDNSWETLNVS -EMTSGRIGAKINHLPRLPETVI-PPCVLTVSGLKDTM-AGMKHGTEITKTPDIKIH--MWGTINFKNKVMQYCTYYKE NRSVR-----------INNTGGAELPK---------------------YHHQNILID-ELYSNI---YPATPKKH--QY M-GVGDVGGYEVICENLFQILVVE >t54 MIWSEYRHMVREG--RDQAIDNK-C------------LIAARVKVMSGYVLQVRNAPEDAL--YNFFTGHKGTRELVKY -YM-CMEGEGCVHTMAKKS-SQSGIWKFNAFMC-LMHLMQG--------GDL--------------------CSH--PS FLGMCMFHAMVQSKGN--LVLPDSCNEDSVSFMYIQHHSFHEKRCLNHLNVGDREDSEHIKEKHMRI----YA---SIG G-H-IIIREWNEVMGRLNHIEVGAEVTFPLRR--RGQASKPV------------------IDGFFAK-D-E------DR PGIQNAMSVPCG----EQWVGSVRGWCSSQHRYGLAIHILVHQQSRLHKTYNIDRKNGANAYDQDSRMKAGAPVYPHIM EYAHGM----FVPFYGMSEPKNNAQGNGEN-PMNVPCVESDDCQYEKKHASMDKQMHQ-SLYLMHMNIMSKPAMGEWVG NRCRNELTALHIVQLDVGFSGKTLGQ-NIGISELLNDRTWLATSPLE------IGCGVMAVEKKEASPK-EFEVAADPT VIYFYRNLIIQHITDVLSAVRMDSPQEMRV------QDVNSPSLGDNNGQA--MNIVPYWVCVVSGVVRKTHKD-SVEK RGQTWTAKS-DFLCPIAVNQGEP---GAEQKPAVGA-DPIVKPWQFVIMNTTTFAKNIFRLLYVKDV-SCNGVLPLLKW QLCDSH-GDWQSLFADSCPIA-VNAAVCAWKRELVPGL---NHSCEHLAKSVYFEPDGE-------DEGKMMLKIFGLD WCEVERSHEH-SSVDDNYN-VNMLPFPNSRKDPVGHYVQDLED-LLRLIRPGT---ARSLTVLF-YGCQYYSGEFQDCE I-ERSQLYN-VYCEL----KQDHKSAIMANKQEQKGMDWNTGKE-MEQGPK-IILHGVLFF-------AEPNVVRQPGV SHIFV-GNDARRMLICGLSMMPGHRFMKEACVPFFHKLFMAVRRMRYDYMTNYDIKIYETHWRPGVYALDNSWETLNVS -EMTSGRIGAKINHLPRLPETVI-PPCVLTVSGLKDTM-AGMKHGTEITKTPDIKIH--MWGTINFKNKVMQYCTYYKE NRSVR-----------INNTGGAELPK---------------------YHHQNILID-ELYSNI---YPATPKKH--QY M-GVGDVGGYEVICENLFQILVVE >t65 MIWSEYRQMVREG--NDQSIDNK-C------------LIADRVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTRELIKF -YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQS--------GDL--------------------CNS--PS FLGMCMFHPQVQAKGE--LVLPDNPNENSVEFMYRRHHSLSDKRCLNLLYVGDSQDSQHLREKQLRI----YG---RIG K-V-VIIKVWNAVMGRLNHHEPPAEVMFPLRK--GGQDSKPF------------------IDGFF------------DR PGIANAMSVKCG----DQWVGSIRGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQTSRMKAAAPLYPHIM EYAHGM----VKPFYGISEPKNEAQGNGEN-PMNVPSVESGECQYEHKHAAMEKLMHH-SLYLMRVNTMSKPVMGDWVG NRCRAELTALQTVQLDVGSSGKTLGQ-NIVTSKKLGDRTWLAASPLQ------IGCGVMVQEKKIASPQ-EVEVAAD-- -IYFYRNMVVQRLTDVVAAVRMQSPQEMRTPVLVCIQKVDSVSLGDDDPQM--MNIVPYWVCVCSGVVKKAEPD-SVDK RGKTWVAKS-DFLCPAAVNEGEH---GAEQRP-FGTQNPVCKPWRFVIMNTQTFAKNIIRLLYIKDI-SCNQVLQLLDW QLCDSH-GDWQSLMADSCPNA-VNIAVCCWKRELMPGL---NHSCEHLAKSVYFKPDGE-------DEGQMTMKIFNLD WCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQELED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSE LFERSQLYR-VYCEI----KKDYKSAIIANQQEHKGMEWDTGKE-MQQGPK-VVVHLALFYAPSNVLAAEPNIIGQPQV SHVFV-GNDARRMLIVGVSMMPAYKFMREACVPFFRKLFMAENQVRYDYMWNYDIKIYETHFRMGVYAVDNSWETLVYC -EMTSGRIGAKINHLPRLPEQVT-PPCILTVSGLKRTV-AGAPDETEITKSPVIHIV--MWADIMFKNCVMQYCTYYKE NRSVR-----------ANNTGGAELPK---------------------YHQQNILTD-ELYSDV---YPAAQKKY--QV M-GVGDIGGYELICEDLFQILVCQ >t29 MIWSEYRQMVREG--NDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTRELIKF -YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQS--------GDL--------------------CNC--PS FLGMCMFHPQVQAKGE--LVLPDNPNENSVEFMYRRHHSLSDKRCLNLLFVGDSQDSQHQREKQLRI----YG---RIG K-V-VIIKVWNAAMGRLNHVEPPAEVMFPLRR--GGSDSKPF------------------LDGFF------------DR PGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILIHGPSKLHRTYAINKKMGANVYQQTSRMKAAAPLYPHIM EYAHGM----VKPFYGQSEPKNEAQGNGEN-PMNVPSVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMPKPVMGDWDG NRCRAELTALQTVQLDVGSSGKTLGQ-NIVTSKKLGDRTWLAASPLQ------IGCGVMVQEKKIASPQ-EVEVAAD-- -IYFYRNMVVQRLTDVVAAVRMQSPQEMRTPVLVCIQKVDSVSLGDDDPQM--MNIVPYWVCVCSGVVKKAEPD-SVDK RGKTWVAKS-DFLCPPAVNEGEH---GAEQRPVFGTHNPVCKPWRFVIMHTQTFSKNIIRLLYIKDI-SCNQVLQLLDW QLCDSH-GDWQSLMADSCPNA-VNIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEGQMSQKIFNLD WCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQELEDSALRLIRPGT---ARALVILF-YGCQYYSGKFQDSE LFERSQLYR-VYCEI----KKDYKSAIIANQQDHKGMEWDTGKE-MQQGPK-VVVHLALFYMPSNVLAAEPNIIGQPQV SHWFV-GNDARRMLIVGVSMMPAYKFMREACVPFFRKLFMADNQVRYDYMWNYDIKIYETHFRMGVYAVDNSWETLVYC -EMTSGRIGAKINHLPRLPEQVT-PPCILTVSGLKRTV-AGAPDETEITKSPVIHIV--MWADIMFKNCVMQYCTYYKE NRSVR-----------ANNTGGAELPK---------------------YHQQNILTD-ELYSDL---YPAAPKKY--QE M-GVGDIGGYELIVEDLFQILVCE >t98 MIWSEHRQMVREG--NDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTKELVKF -YF-CMEGCGCVNTMAKKS-SQSAPWKFNRFMC-LMHLMQS--------GDL--------------------CNS--PP FLGMCMFHPQVQMKGE--LVLPDNPNENSVEFMYRRHHSLSDKRCLNLLYVGDSQDSQHLREKQLRI----YG---RIG K-V-VIIKVWNAVMGRLNHHEPPAEVMFPLRR--GGSDSKPF------------------IDGFF------------DR PGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILVHNPSRLHRTYAINKKMGANVYQQSSRMKAAAPLYPHIM EYAHGM----VKPFYGISEPKNEAQGNGEN-PMNVPSVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVG NRCRAELTALQTVQLDVGSSEKTLGQ-NIVTSKRLGDRTWLAASPLQ------IGCGVMVQEKKIASPQ-EVEVAAD-- -IYFYRNMVVQRHTDVVAAVRMQSPQEMRNPVLVCIQKVDSASLGDDDPQM--MKIVPYWVCVCSGVVKKAEPD-SVDK RGKTWVAKS-DFLCPPAVNEGEH---GAEQRPVFGTQNPVCKPWRFVIMNTQTFSKNIIRLLYIKDI-SCNQVLQLLDW QLCDSH-GDWQSLMADSCPPA-VNIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEGQMTMKIFNLD WCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQELED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSE MFERSQLYR-VYCEI----KKDYKSAIIANQQEHKGMEWDTGKE-MQQGPK-IVVHLGLFYMPSNVLAAEPNIIGQPQV SHVFV-ANDARRMLIVGVSMMPAYKFMREACVPFFRKLFMAENQVRYDYVWNYDIKIYETHFRMGVYAVDNSWETLVYC -EMTSGRIGAKINHLPRLPEQVT-PPCDLTVSGLKRTV-AGPPDETEITKSPVIHIV--MWADIVFKNCVMQYCTYYKE NRSVR-----------ANNTGGAELPK---------------------YHQQNLLTD-ELYSDI---YPAAPKKY--QV M-GVGDIGGYELIVEDLFQILVCE >t83 MIWSEYRQMVREG--NDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTKELIKF -YF-CMEGCGTVNTMAKDS-SQSAPWKFNRFMC-LMHLMQS--------GDL--------------------CNS--PS FLGMCMFHPQVQAKGE--LVLPDNPNENSVEFMYRRHHSLSDKRCLNLLYVGDSQDSQHLREKQLRI----YG---RIG K-V-VIIKVWNAVMGRLNHHEPPAEVMFPLRR--GGSCSKPF------------------VDGFF------------DR PGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILIHNPSRLHRTYAINKKMGANVYQQTSRMKAAAPLYPHIM EYAHGM----VKPFYGISEPKNEAQGNGEN-PMNVPSVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVG NRCRAELTALQTVQLDLGSSGKTLGQ-NIVTSKKLGDRTWLAASPLQ------IGCGIMVQEKKIASPQ-EVEVAAD-- -IYFYRNMVVQRHTDVVAAVRMQSPQEMRNPVLVCIQKVDSISLGDDDPQM--MNIVPYWVCVCSGVVKKAEPD-SVDK RGKTWVAKS-DFLCPPAVNEGEH---GAEQRPVFGTQNPVCRPWRFVIMNMQTFSKNIIRLLYIKDI-SCNQVLQLLDW QLCDSH-GDWQSLMADSCPPA-VNIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEGQMTMKIFNLD WCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQELED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSD MFERSQLYR-VYCEI----KKDYKSAIIANQQEHKGMEWDTGKE-MQQGPQ-IEVHLALFYMPSNVLPAEPNIIGQPQV SHVFV-GNDARRMLIVGVSLMPAYKFMREACVPFFRKLFMAENQVRYDYMWNYDIKIYETHFRMGVYAVDNSWETLVYC -EMTSGRIGAKINHLPRLPEQVT-PPCILTVSGLKRTI-AGAPDETEITKSPVIHIV--MWADIMFKNCVMQYCTYYKE NRSVR-----------ANNTGGAELPK---------------------YHQQNLLTD-ELYSDI---YPAAPKKY--QV M-GVGDIGGYELIVEDLFQILVCE >t95 MIWSEYRQMVREG--TDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTRELIKF -YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQL--------GDL--------------------CNP--PS FLGMCMFHPQVRSKGE--LVLPDNPNENSVEFMYRRHHSLSDKRCLNLLYTGDSQDSQHLREKQLRI----YG---RIG R-V-VILQVWNAVMGRLNHHEPPAEVMFPLRK--GGSNSKPV------------------YDGFF------------DR PAIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILVHGPSKLHRTYAINRKVGANVYQQTSRMKAAAPLYPHIM EYAHGM----VKPFYGISEPKNEAQGNGEN-PMNVPSVESGECVYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVG HRCRAELTELQTVQLDVGSSGKTLGQ-NIVTSKKLGDRTWLAASPLQ------IGCGVMVQEKKIASPQ-QVEVAAD-- -IYFYRNMVVQRLTDVVAAVRMQSPQEMRCRKLVCIQKVDSPSLGDDDPQM--MNIVPYWVCVCSGVVKKAEPD-SVDK RGKTWVAKS-DFLCPPAVNEGEH---GAEQRPVFGGQNPACKPWHFVIMNRQTFAKNMIRLLYIKDI-SCNEVLQLLDW QLCDSH-GDWQSLIADSCPNA-SNIAVCCWKRELMPGL---NFSCEHLAKTVYFKPDGE-------DEGQMIMKIFNLD WCEVEKSHEK-SSVDDNYD-VNMLPFMQSHGDPVGHYVQGLED-ALRLIRPGT---ARALIILF-YGCQYYSGKFQDSE LFERSQLYR-MYCEI----KKDHKSAIIANQQEHKGMEWDTGKE-MQQGPK-VVLHHALFYAPSNVLAAEPNIIGQPQV SHVFV-GNDARRMLITGVSMMPAYKFMREACVPFFRKLFMAENQVRYDYVWNYDIKIYETHYRMGVYAVDNSWETLVYC -EMTSGRIGAKVNHLPRLPEQVT-PPWVLTVSGLKRTV-AGAPDETEITKSPIIHII--MWEEIMFKNCVMQYCTYYKE NRSVR-----------ANNTGGAELPK---------------------YHQQNILTD-ELYSDI---YPAAPKKY--QV M-GVGDIGGYELICEDLFQILVCE >t20 MIWSEYRQMVREG--TDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTRELIKF -YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQL--------GDL--------------------CNP--PS FLGMCMFHPQVRSKGE--LVLPDNPNENSVEFMYRRHHSLSDKRCLNLLYVGDSQDSQHLREKQLRI----YG---RIG R-V-VILQVWNAVMGRLNHHEPPAEVMFPLRK--GGSNSKAV------------------YDGFF------------DR PGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILVHGPSKLHRTYAINRKVGANVYQQMSRMKAAAPLYPHIM EYAHGM----VKPFYGISEPKNEAQGNGEN-PMNVPSVESGECVYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVG HRCRAELTELQTVQLDVGSSGKTLGQ-NIVTSKKLGDRTWLAAPPLQ------IGCDVMVQEKKIASPQ-QVEVAAD-- -IYFYRNMVVQRLTDVVAAVRMQSPQEMRTPKLVCIQKVDSPSLGDDDPQC--MNIVPYWVCVCSGVVKKAEPD-SVDK RGKTWVAKS-DFLCPPAVNEGEH---GAEQRPVFGGQNPACQPWHFVIMNTQTFAKNMIRLLYIKDI-SCNQVLQLLDW QLCDSH-GDWQSLIADSCPNA-VNIAVCCWKRELMPGL---NFSCEHLAKTVYFKPDGE-------DEGQMIMKIFNLD WCEVEKSHEK-SSVDDNYD-VNMLPFMQSHGDPVGHYVQGLED-ALRLIRPGT---ARALIILF-YGCQYYSGKFQDSE LFERSQLYR-MYCEI----KKDHKSAIIANQQEHKGMEWDTGKE-MQQGPK-VVLHHALFYAPSNVLAAEPNIIGQPQV SHVFV-GDDARRMLITGVSMMPAYKFMREACVPFFRKLFMAENQVRYDYVWNYDIKIYETHYRMGVYAVDNSWETLVYC -EMTSGRIGAKINHLPRLPEQVT-PPWVLTVSGLKRTV-AGAPDETEITKSPIIHII--MWEDIMFKNCVMQYCTYYKE NRSVR-----------ANNTGGAELPK---------------------YHQQNILTD-ELYSDI---YPAAPKKY--QV M-GVGDIGGYELICEDLFQILVCE >t69 MIWSDYRQMVREG--GDDSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGHKGTRELVKY -YH-CMEGCGCVNTMAKSS-SQSAPWKFNRFMC-LMHLMQE--------GDL--------------------CNS--PG FLGMCMFHPQVQRKGE--LVLPDNPNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSQHLREKQLRM----YG---EIG K-V-VIIKVVNAVMGRLNHVEPSAEVMFPLRK--GGSDSKPF------------------IDGFF------------DR PGIANAMSVPCG----DQWVGSIRGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQISRMKALAPLYPHIM EYAHGM----VKPFYGISEPKNEAQGNGEN-PMNVPCVESGECQYENKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVG NRCRAELTALKTGQLDVGSSGKTLGQ-NIMTSKKLGDRTWLAANPLQ------IGRGVMVWEKKVASPQ-EVEVAAD-- -IYFYRNMVIECLTDVVRAVRMQSPQEMRAPVLVCIQKVDSPSLGDDVPQM--MNIVPYWVCVCSGVVKKAQPD-SVDK RGKTWVAKS-DFLCPPAMNEGEH---GAEQRPVFGTQNPVCKPWRFVIMNTETFAKNIIRLLYVKDV-SCNQVLQLLDW QLCDSH-GDWQSLVADSCPNA-VNIAVCCWKRELMPGL---NHSCQHLAKTVYFKPDGE-------DEGQMIVKIFNLD WCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQGLED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSE LFERSQLYR-VYCEI----KKDHNSAIIANQQEHKGMDWDTGKE-MQQGPK-IVVVLALFYQPSNVLAAEPNIMGQPQV SVVFV-GNDGRRMLIVGCSMMPDYKFMREACVPFFRKLFMAEVQDRYDYAWNYNIKIYETHFRDGVYAVDNSWETLVYV -EMASGRIGAKINHQPRLPEQVI-PPCILTVSGLKRTI-ADEPDETEITKTPIIHIV--MWADIMFKNCVMQYCNYYKE NRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDI---YPAAPKKY--QI M-GVGDIGGYELICEDLFQILVCE >t2 MIWSDYRQMVREG--GDDSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTRELVKY -YH-CMEGCGCVNTMAKSS-SQSAPWKFNRFMC-LMYLMQD--------GDL--------------------CNS--PG FLGMCMFHPQVQAKGE--LVLPDNPNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSQHLREKQLRM----YG---EIG K-V-VIIKVVNAVMGRLNHHEPSAEVMFPLRK--GGSDSKPF------------------IDGFF------------DR PGIANAMSVPCG----DQWMGSIKGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQISRMKALAPLYPHIM EYAHGM----VKPFYGISEPKNEAQGNGEN-PMNVPCVESGECQYENKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVG NRCRAELTALKTGQLDVGSSGKTLGQ-NIMVSKKLGDRTWLAASPLQ------IGCGVMVWEKKVPSPQ-EVEVAAD-- -IYFYRNMVIECLTDVVRAVRMQSPQEMRAPVLVCIQKVDSPSLGDDVPQM--MNIVPYWVCVCSGVVKKAQPD-SVDK RGKTWVAKS-DFLCPPAMNEGEH---GAEQRPVFGTQNPVCKPWRFVIMNVETFAKNIIRLLYVKDV-SCNQVLQLLDW QLCDSH-GDWQSMVADSCHNA-VNIAVCCWKRELMPGL---NHSCQHLAKTVYFKPDGE-------DEGQMMVKIFNLD WCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQGLED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSE LFERSQLYR-VYCEI----KKDHNSAIIANQQEHKGMDWDTGKE-MQQGPK-IVVVLALFYQPSNVLAAEPNIMGQPQV SVVFV-GNDARRMLIVGCSMMPDYKFMREACVPFFRKLFMAEVQDRYDYNWNYNIKIYETHFRVGVYAVDNSWETLVYV -EMASGRIGAKINHMPRLPEQVI-PPCILTVSGLKRTI-ADEPDETEITKTPIIHIV--MWADIMFKNCVMQYCNYYKE NRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDI---YPAAPKKY--QI M-GVGDIGGYELICEDLFQILVCE >t10 MIWSDYRQMVREG--GDDSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YNFFTGQKGTRELVKY -YH-CMEGCGCVNTMAKSS-SQSAPWKFNRFMC-LMYLMQD--------GDL--------------------CNS--PG FLGMCMFHPQVQAKGE--LVLPDNPNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSQHLREKQLRM----YG---EIG K-V-VIIKVVNAVMGRLNHHEPSAEVMFPLRK--GGSDSKPF------------------IDGFF------------DR PGIANAMSVPCG----DQWMGSIKGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQISRMKALAPLYPHIM EYAHGM----VKPFYGISEPKNEAQGNGEN-PMNVPCVESGECQYENKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVG NRCRAELTALKTGQLDVGSSGKTLGQ-NIMVSKKLGDRTWLAASPLQ------IGCGVMVWEKKVPSPQ-EVEVAAD-- -IYFYRNMVIECLTDVVRAVRMQSPQEMRAPVLVCIQKVDSPSLGDDVPQM--MNIVPYWVCVCSGVVKKAQPD-SVDK RGKTWVAKS-DFLCPPAMNEGEH---GAEQRPVFGTQNPVCKPWRFVIMNVETFAKNIIRLLYVKDV-SCNQVLQLLDW QLCDSH-GDWQSMVADSCHNA-VNIAVCCWKRELMPGL---NHSCQHLAKTVYFKPDGE-------DEGQMMVKIFNLD WCEVEKSHEK-SSVDDNYN-VNMLPFMQSHGDPVGHYVQGLED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSE LFERSQLYR-VYCEI----KKDHNSAIIANQQEHKGMDWDTGKE-MQQGPK-IVVVLALFYQPSNVLAAEPNIMGQPQV SVVFV-GNDARRMLIVGCSMMPDYKFMREACVPFFRKLFMAEVQDRYDYNWNYNIKIYETHFRVGVYAVDNSWETLVYV -EMASGRIGAKINHMPRLPEQVI-PPCILTVSGLKRTI-ADEPDETEITKTPIIHIV--MWADIMFKNCVMQYCNYYKE NRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDI---YPAAPKKY--QI M-GVGDIGGYELICEDLFQILVCE >t31 MIWSEYRSMVREG--ADQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YTFFTGQKGTRELIKY -YF-CMEGCGCVNTMAKQS-SQSAPWKFNRFMC-LMHLMQN--------GDL--------------------CNL--PS FLGMCMFHNQVQCKGE--LVLPDNVNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSQHLREKQLRI----YQ---NIG K-V-VIIKVWNANMGRLNHHEPPAEVMFPLRK--GGSDSKPF------------------IDGFF------------DR PGIANAMNVPCG----DQWVGCIRGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQTSRMKAAAPIYPHIM EYAHGM----VKPFYGISEPKNEAQKNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPVMGDWVG NRCRAELTALKTVQLDVGSNGKTLGK-NIMTPKKLGDRTWLAASPLQ------IGCGVMVQEKKVASPQ-EVE-AAD-- -IFFYRNMVIQRLTDVVAAVRMQSPQEMRPPVLVCIQYVDSPSLGDDTPQM--MNIAPYWVCVCSGVVKKAQPD-SVDK RGKTWVAKA-DFLCPPAVSEGEH---GAEQRPVFGMQNPACKPWRFVIMNTQTFAKNIIRLLYIKDV-SCNAVLQLLDW QLCYSH-GDWQSLIADSCPTA-ANIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEGQMIMKIFNLD WCEVEKSHEK-SSVDDNYH-VNMLPFAQSNRDPVGHYVQGLED-ALRLIRPGT---ARALVILF-YGCQYYSGKFQDSE LFERSQLYR-VYCEI----KKDHKSAIIANQHEVKGMDWNTGKE-MQQGPK-IVVHLALFYAPSNVLAAEPNIVGQPQV SHVFV-GNDARRMLIVGVSVMPSYKFMREACVPFFHKLFMADNQVRHEYMWNYDIKIYETHFRMGVYAVDNSWETLVYC -EMTSGRIGANINHLPRLPEQVV-PPCILTVSGLKRTI-AGSPDETEITKTLIYHIV--MWADIMFKNCVMQYSTYYKE NRSVR-----------ANNTGGAELPK---------------------YHQQNILTD-ELYSDM---YPAAPKKYVIRV M-GVGDIGGYEMICEDLFQILVCE >t15 MIWSEYRSMVREG--SDQSIDNH-C------------LIAARVKMMSGVVRQVRNAPEDAL--YTFFTGQKGTRELIKY -YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQN--------GDL--------------------CNS--PS FLGMCMFHNQVPNKGA--LVLPDNVNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSQHLREKQLRI----YP---SIG K-V-VIIKVWNAAMGRLNHHEPPAEVMFPLRK--GGSDSKPF------------------IDGFF------------DR PGIANAMSVPCG----DQWVGCIRGWCSSQHQYGLANHILVHGPSKLHRTYAINRKMGANVYQQTSRMKAAAPLYPHIM EYAHGM----VEPFYGISEPKNEAQKNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPVLGDWVG NRCRAELTALKTVQLDIGNSGKTLGQ-NIMTPKRLGERTWLAASPLQ------IGCGVVVQEKKVASPQ-EVEVAAD-- -IYFYRNMVIQTLTDVVAAVRMQSPQEMRPPVLVCIQDVDSVSLGDDTPQM--MNIAPYWVCVCSGVVKKTQPD-SVDK RGKTWVAKS-DFLCPPAVSEGEH---GAEQRPVFGMRNPICKPWRFVIMNTQTFAKNIIRLLYVKDV-SCNAVLQLLDW QLCYSH-GDWQSLIADSCSTA-ANIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEGQMIVKIFHLD WCEVEKSHEK-SSVDDNYH-VNMLPFAQSNGDPVGHYVQGLED-ALRLIRPGT---ARALVILS-YGCQYYSGKFQDSE LFERSQLYR-VYCEI----KKDHKSAIVANQQEVKGMDWDTGKE-MQQGPK-IVVHLALFYAPSNVLAAEPNIVGQPQV SHVFV-GNDARRMLIVGVSVMPAYKFMREACVPFFHKLFMAENQVRHDYMWNYDIKIYETHFRMGVYAVDNSWETLVYC -EMTSGRIGAKINHLPRLPEQVV-PPCILTVSGLKRTI-AGAPDETEITKTLIYHIV--MWADIMFKNCVMQYSTYYKE NRSVR-----------GNNTGGAELPK---------------------YHQQNILTD-ELYSDV---YPTAPKKYVIRV M-GVGDIGGYELICEDLFQILVCQ >t63 MIWSEYRSMVREG--SDQSIDNK-C------------LTAARVKLMSGVVRQVRNAPEDAL--YTFFTGQKGKRELIKY -YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQD--------GDL--------------------CNS--PS FLGMCMFHNQVSFKGE--LVLPDNPNENSVEFMYRRHHSLSDKRCLNLLYVGDEQDSSHLREKQLRL----YA---RIG K-V-VIIKVWNAVMGRLNHHEPPAAVMFPLRK--GGSDSKPF------------------IDGFF------------DR PGIANAMSVPCG----DQWMGIIRGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQDSRMKAAAPLYPHIM EYAHGM----VKPFYGISEPKNEAQKNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRANTMSKPVMGDWVG SRCRAELTALKTVQLDVGSSGKTLGQ-NIMTPKKLGDRTWLAASPLQ------IGCDVMVQEKKVASPQ-EVEVAAD-- -IYFYRNMVIQRLTDVVAAVRMQSPQEMRLPVLVCIQDVDSPSLGDDTPQK--MNIAPYWVCVCSGVVKKAQLD-SVEK RGKTWVAKS-DFLCPPAVSEGEH---GPEQRPVFGVQNPVCKPWRFVIMNTQTFAANIIRLLYVKDV-SCNAVLQLLDW QLCYSH-EDWQSLIADACPTA-VNIAVCCWKRELMPGL---NHSCEHLAKSVYFKPDGE-------DEGQMFMKIFNLD WCEVEKSHEK-SSVDDNYN-VNMLPFVQSNGDPVGHYVQGLED-ALLLIRPGT---ARALVILF-YGCQYYSGKFQDSE LFERSQLYR-VYCEI----KKDHKSAIIANQQGVKGMDWDTGKE-MQQGPK-IVVHLALFYAPSNVLAAEPNIIGQPQV SHVFV-DNDARRMLIVGQSVMPPYKFMREACVPFFHKLFMAENQVRHDYVWNYDIKIYETHFRMGVYAIDNSWETLVYC -EMTSGRIGAKINHQPRLPEQVV-PPCVLTVSGLKRTI-AGAPDETEITKTLIYHIV--MWADIMFKNCVMQYSTYYKE NRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDM---YPAAPKKYVIRQ M-GMGDIGGYELICEDLFQILVCE >t50 MIWSEYRSMVREG--SDQSIDNK-C------------LTAARVKLMSGVVRQVRNAPEDAL--YTFFTGQKGTRELIKY -YF-CMEGCGCVNTMAKNS-SQSAPWKFNHFMC-LMHLMQN--------GDL--------------------CNS--PS FLGMCMFHNQVSVKGE--LVLPDNPNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSSHLREKQLRL----YA---RIG K-V-VIIKVWNAVMGRLNHHEPPAEVMFPLRK--CGSDSKPF------------------IDGFF------------DR PGIANAMSVPCG----DQWMGVIRGWCSSQHQYGLANHVLVHGPSKLHRTYAINKKMGANVYQQDSRMKAAAPLYPHIM EYAHGM----VKPFYGISEPKNEAQKNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRANTMSKPVMGDWVG SRCRAELTALKTVQLDVGSSGKTLGQ-NIMTPKKLGDRTWLAASPLQ------IGCDVMVQEKKVASPQ-EVEVAAD-- -IYFYRNMVIQRLTDVVAAVRMQSPQEMRLPVLVCIQDVDSPSLGDDTPQK--MNIAPYWVCVCSGVVKKAQPD-SVDK RGKTWVAKS-DFLCPPAVSEGEH---GPEQRPVFGVQNPVCKPWRFVIMNTQT-AKNIIRLLYVKDV-SCNAVLQLLDW QLCYSH-GDWQSLIADACSTA-VNIAVCCWKRELMPGL---NHSVEHLAKSVYFKPDGE-------DEGQMIMKIFNLD WCEVEKSHEK-SSVDDNYN-VNMLPFVQSNGDPVGHYVQGLED-ALLLIRPGT---ARALVILF-YGCQYYSGKFQDSE LFERSQLYR-VYCEI----KKDHKSAIMANQQGVKGMDWDTGKE-MQQGPK-IVMHLALFYAPSNVLAAEPNIIGQPQV SHVFV-DNDARRMLIVGQSVMPAYKFMREACVPFFHKLFMAENQVRHDYVWNYDIKIYETHFRMGVYAVDNSWETLVYC -EMTSGRIGAKINHQPRLPEQVV-PPCILTVSGLKRTI-AGAPDETEITKTLIYHIV--MWADIMFKNCVMQYSTYYKE NRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDM---YPAAPKKYVIRQ M-GMGDIGGYELICEDLFQILVCE >t25 MIWSEYRSMVREG--SDQSIDNK-C------------LTAARVKMMSGVVRQVRNAPEDAL--YTFFTGQKGTRELIKY -YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQN--------GDL--------------------CNS--PS FLGMCMFVNQVSFKGE--LVLPDNPNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSSHLREKQLRL----YA---RIG K-V-VIIKVWNAVMGRLNHHEPPAEVVFPLRK--GGSDSKPF------------------IDGFF------------DR PGIANAMSVPCG----DQWMGVIRGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQDSRMKAAAPLYPHIM EYAHGM----VKPFYGISEPKNEAQKNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRANTMSKPVMGDWVG SRCRAELTALKIVQLDVGSSGKTLGQ-NIMTPKKLGDRTWLAASPLQ------IGCDVMVQEKKVASPQ-EVEVAAD-- -IYFYRNMVIQRLTDVVAAVRMQSPQEMRLPVLVCIQDVDSPSLGDDTPQK--MNIAPYWVCVCSGVVKKAQPD-SVDK RGKTWVAKS-DFLCPPAVSEGEH---GPEQRPVFGMQNPVCKPWRFVIMNTQTFAKNIIRLLYVKDV-SCNAVLQLLDW QLCYSH-GDWQSLIADACPTA-VNIAVCCWKRELMPGL---NHSCEHLAKSVYFKPDGE-------DEGQMIMKIFNLD WCEVEKSHEK-SSVDDNYD-VNMLPFVQSNGDPVGHYVQGLED-ALLLIRPGT---ARALVILF-YGCQYYSGKFQDSE LFERSQLYR-VYCEI----KKDHKSTIIANQQGVKGMDWDTGKE-MQQGPK-IVVHLTLFYAPSNVLAAEPNIFGQPQV SHVFV-DNDARRMLIVGQSVMPAYKFMREACVPFFHKLFMAENQVRHDYVWNYDIKIYETHFRMGVYAVDNSWEALVYC -EMTSGRTGAKINHQPRLPEQVV-PPCILTVSGLKRTI-AGAPDETEITKTLIYHIV--MWADIMFKNCVMQYSTYYKE NRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDM---YPAAPKKYVIRQ M-GMGDIGGYELICEDLFQILVCE >t51 MIWSEYRSMVREG--SDQSIDNK-C------------LIAARVKMMSGVVRQVRNAPEDAL--YTFFTGQKGTRELIKY -YF-CMEGCGCVNTMAKNS-SQSAPWKFNRFMC-LMHLMQN--------GDL--------------------CNS--PS FLGMCMFHVQVQFKGE--LVLPDHPNENSVEFMYRRHHSLSDKRCLNLLYVGDNQDSQHLREKQLRI----YA---RIG K-V-VIIKVWNAVMGRLNHHEPPAEVVFPLRK--GGSDSKPF------------------IDGFF------------DR PGIANAMSVPCG----DQWVGVIRGWCSSQHQYGLANHILVHGPSKLHRTYAINKKMGANVYQQDSRMKAAAPLYPHIM EYAHGM----VKPFYGISEPKNEAQKNGEN-PMNVPCVESGECQYESKHAAMEKLMHQ-SLYLMRVNTMSKPIMGDWVG NRCRADLTALKTVQLDVGSSGKTLGQ-NIMTPKNLGDKTWLAASPLQ------IGCGIMVQEKKVASPQ-EVEVAAD-- -IYFYRNMVIQRLTDVVAAVRMQSPQEMRPPVLVCIQDVDSPSLGDDTPQM--MNIAPYWVCVCSGVVKKAQPD-SVDK RGKTWVAKS-DFLCPPAISEGEH---GAEQRPVFGMQNPVCRPWRFVIMNTQTFAKNVIRLLYVKDV-SCNKVLQLLDW QLCYSH-GDWQSLIADACPAA-VNIAVCCWKRELMPGL---NHSCEHLAKTVYFKPDGE-------DEGQMIMKIFGLD WCEVEKSHEE-SSVDDNYN-VNMLPFVSSNGDPVGHYVQGLED-ALRLIRPGT---ARALVILF-YGCQFYSGKFQDSE LFERSQLYR-VYCEI----KKDHKSAIIANQQGVKGMDWDTGKE-MQQGPK-IVVHLALFYAPSNVLAAEPNIIGQPQV SHVFV-DNDIRRMLIVGVSVMPAYKFMREACVPFFHKLFMAENQVRHDYVWNYDIKIYETHFRMGVYAVDNSWETLVYC -EMTSGRIGAKINHLPRLPEQVF-PPCILTVSGLKRTI-AGAPDETEITKTLIYHIV--MWADIMFKNCVMQYATYYKE NRSVR-----------VNNTGGAELPK---------------------YHQQNILTD-ELYSDM---YPAAPKKYVIRE M-GVGDIGGYELICEDLFQILVCE >t9 MIWSECRQMVREK--TDNAIDNQ-C------------LIAARVKENSGHVKQVRNDSEDIL--YNHFTGKKGARELIKR -YV-CMEGHDCVNTVANVS-SQSAIWKFDRFMCDLMHLMQN--------GDL--------------------CSY--PS FLPMCMFVPQVLSKGE--LVLPDSSNEESISFVYLQHHSFSVKRCLNYLNVGDSADSKHLKEKHLRA----EGMPGNIG K-H-VIINVWNAVMGRLNHHEPPAEVVFPLRQ--GGPDSKPM------------------IDGFFDK-D-D------DR PGIANAMSVPCG----EQWGGPTRGWCSSQQKFGLASHTLSHDPSSLHRTFAINRKMGANVYQQTSRMKASAPFYPHCM EYAHGV----CKPFYEHSEPKNEAQGNGEK-PMNVPCVESIDCQYENKHATMTKLMHH-SLYLMRMHTMSKPIMGDVNG NRCRADLTALKMLQLDIGFSAKTLGQ-NVVVPHLLGHRTWLATSPLQ------IGCGVMIFGNKIGSAN-EFEAAADPT VIYFYRNRIVRRLADVISTVRMNSPQEMRN------QDVDSHSLGDDDNQA--MVILPYWVCVCSGVVRKEHPDTSINK RGRTWLAKV-DFLCPPAFNQGEH---RAEQKPASKFEHPVCGPWEFVIVNYQTSAKDIILLLYIKDV-GCNCVLELLNW QLCTSH-GDWQSLVADSCVWA-HNVAVCAWKRELVPGL---NHSCERLAKHIYFQPDGE-------DEGKMVLKIFGLD WCEMEKSHQR-SGVDDNYQ-VNMLPFNHSKNNPVGHYVQGLED-ALNLIRPGT---ARALNILF-YGCEVYSGEFQDSE S-ERSWIYN-VYCEI----KKDHKSAIIAHKYEHKGMDWDTGKE-AQQGPE-VVHHNLLFF-------AEPNIHGQPGV GHIFV-GNDARRKLIAGVSFMSMVKFMCEACVPFFRKLFMAVGQMRYDYVNNYDIKIWETVFRGGVYAIENSWETLVLC -EMTSGRCGAKMNHLPRLFEQVC-PPCILTVSGLKETK-AGLIDGTEITKTPDIGIC--MWETIHFKNPVMQYCCYYKE NRSVR-----------VNNTGGAALPK---------------------YHHQNTLVD-ELYSDV---YPAAPKKK--VC M-GVGDVGGYEVMGEDLFQILVCE >t47 MIWSECRQMVREK--SDNAIDNQ-C------------QIAARVKRNSGHVKQVRNDSEDIL-CYHHFTGKKGARELIKR -YI-CMEGHDCVNTMANVS-SQSAVWNFDRFMCDLMHLMQN--------GDL--------------------CSY--PS FLPMCMFVSQVLSKGE--LVLPDSSNEESISFVYLQHHSFSDKRCLNYLNVGDNSDSKHLKEKHLRA----QGMPGNIG K-H-VIINVWNAVMGRLNHHESPAEVVFPLRQ--GGPDSKPM------------------IDGFFDK-D-D------DR PGIANAMSVPCG----EQWGGPTRGWCSSQQKFGLASHTLSHDPSNLHRTFAINRKMGANVYQQASRMKASAPFYHHCM EYAHGV----CKPFYEHSEPKNEAQGNGEK-PMNVPCVESRDCQYENKHATMTKLMHQ-SLYLMRMHTMSKPIMGHVNG NRCRADLTALKMLQLDIGFSAKTLCQ-NIVVAHLLGHRTWLATSPLQ------IGCMVMIFGNKIGSAN-EFEAAADPT VIYFYRNRIVRRLADVISTVRMNSPQEMRE------QDVDSHSLGDDDNQA--MNILPYWVCVCSGVVRKEHPDTSINK RGRTWLAKM-DFLCPPAFNQGEH---RAEQKPALKFKHPVCGPWEFVIVNYQTSAKDIILLLYIKDV-GCNCVLELLKW QLCTSH-GDWQSLVADSCVWA-HNVAVCAWKRELVPGL---NHSCDRLAKHIYFQPDGE-------DEGKMILKIFGLD WCEMEKSHQR-SGVDDNVQ-VNMLPFNHSKHNPVGHYVQGLED-ELNLIRPGT---ARALNILF-YGCEYYSGEFQDSE S-ERSWIYN-VYCEI----KKDHKSAIMACKYRHKGMDWDTGKE-AEQGPE-VVHHNLLFF-------AEPNIQGQPGV GHIFV-GNDARRKLIAGLSFMAMMKFMCEACVPFFRKLFMAVGHMRYDYVSNYDIKIWETVFRGGVYAIENSWETLVLC -EMTSGRCGAKMNHLPRLFEQVC-PPCILTVSGLKETK-AGIIDGTEITKTPEIGIC--MWDTIEFKNPVMQYCCYYKE NRSVR-----------VNNTGGAELPK---------------------YHHQNTLVD-ELYSDI---YPAAPKKK--VC M-GVGDVGGYEVMGEDLFQILVCE >t60 MIWSECRQMVREK--TDNAIDNQ-C------------QIAARVKENSGHVKQVRNDSEDIL-CYHHFTGKKGARELIKR -YI-CMEGHDCVNTMANVS-SQSAVWNFDRFMCDLMHLMQN--------GDL--------------------CSV--PS FLPMCMFVSQVLHKGE--LVLPDSSNEESISFVYLQHHSFSDKRCLNYLNVGDNADSKHLKEKHLRA----QGMPGNIG K-H-VIINVWNAVMGRLNHHEPPAEVVFPLRQ--GGPDSKPM------------------IDGFFDK-D-D------DR PGIANAMSVPCG----EQWGGPTRGWCSSQQKFGLASHTLSHNPSNLHRTFAINRKMGANVYQQTSRMKASAPFYHHCM EYAHGV----CKPFYEHSEPKNEAQGNGEK-PMNVPCVESIDCQYESKHAAMTKLMHQ-SLYLMRIHTMSKPIMGDVNG NRCRADLTALKMLQLDIGFSAKTLGQ-NIVVPHCLGHRTWLATSPLQ------IGCMVMIFGNKIGSEN-EFEAAADPT VIYFYRNRIVRRLADVISTVRMNSPQEMRN------QDVDSHSLGDDDNQA--MNILPYVVCVCSGVVRKDHPDTSIHK RGRTWLAKV-DFLCPPAFNQGEH---RAEQKPAFKFKHPVCGPWEFVIVNYQTEAKDIILLLYIKDV-GCNCVLELLEW QLCTSH-GDWQSLVADSCVWA-HNVAVCAWKRELVPGL---NHSCERLAKHIYFQPDGE-------DEGKMILKIFGLD WCEMEKSHQR-SGVDDNYQ-VNMLPFNHSKHNPVGHYVQGLED-ALNLIRPGT---ARALNILF-YGCEYYSGEFQDSE S-ERSWIYN-VYCEI----KKDHKSAIMAYKFEHKGMDWDTGKE-AEQGPE-VVHHNLLFF-------AEPNIQGQPGV GHIFV-GNDARRKLIAGVSFMAVMKFMCEACVPFFRKLFMAVGQMRYDYMNNYDIKIWETFFRGGVYAIENSWETLVLC -EMTSGRCGAKMNHLPRLFEQVC-PPCILTVSGLKETR-AGIIEGTEITKTPDIGIC--MWDTIHFKNPVMQYCCYYKE NRSVR-----------VNNTGGAELPK---------------------YHHQNTLVD-ELYSDI---YPAAPKKK--VC M-GVGEVGGYEVMGEDLFQILVCE >t30 MIWSECRQMVREK--TDNAIDNQ-C------------QIAARVKENSGHVKQVRNDSEDIL-CYHHFTGKKGARELIKR -YI-CMEGHDCVNTMANVS-SQSAVWNFDRFMCDLMHLMQN--------GDL--------------------CSV--PS FLPMCMFVSQVLHKGE--LVLPDSSNEESISFVYLQHHSFSEKRCLNYLNVGDNADSRHLKEKHLRA----QGMPGNIG K-H-VIISVWNAVMGRLNHHEPPAEVVFPLRQ--GGPDSKPM------------------MDGFFDK-D-D------DR PGIANAMSVPCG----EQWGGPTRGWCSSQQKFGLASHTLSHTPSNLHRTFAINRKMGANVYQQTSRMKASAPFYHHCM EYAHGV----CKPFYEHSEPKNEAQGNGEK-PMNVPCVESIDCQYENKHATMTKLMHQ-SLYLMRMHTMSKPIMGDVNG NRCRADLTALKMLQLDIGFSAKTLGQ-NIIVPHCLGHRTWLATSPLQ------IGCMVMIVGNKIGSEN-EFEAAADPT VIYFYRNRIVRRLADVISTVRMNSPQEMRN------QDVDSHSLGDDDNQA--MNILPYVVCVCSGVVRKEHPDTSISK RGRTWLAKI-DFLCPPAFNQGEH---RAEQKPAFKFKHPCCGPWEFVIVNYQTSAKDIILLLYIKDV-GCNCVLELLKW QLCTSH-GDWQSLVADSCVWA-HNVAVCEWKRELVPGL---NHSCERLAKHIYFQPDGE-------DEGKMILKIFGLD WCEMEKSHQR-SGVDDNYQ-VNMLPFKHSKHNPVGHYVQGLED-ALNLIRPGT---ARALNILF-YGCEYYSGEFQDSE S-ERSWIYN-VYCEI----KKDHKSAIMAYKYEHKGMDWDTGKE-AEQGPE-VVHHNLLFF-------AEPNIHGQPGV GHIFV-GNDARRHLIAGVSFMAVVKFMCEACVPFFRKLFMAVGQMRYDYVNNYDIKIWETVFRGGVYAIENSWETLVLC -EMTSGRCGAKMNHLPRLFEQVC-PPCILTVSGLKETR-AGIIDGTEITKTPDIGIC--MWDTIHFKNPVMQYCCYYKE NRSVR-----------VNNTGGAELPK---------------------YHDQNTLVD-ELYSDV---YPAAPKKK--VC M-GVGDVGGYEVMGEDLFQILVCE >t70 MIWSECRQMVREK--NDNAIDNQ-C------------LIAARVKEDSGHVKQVRNASEDIM--YNHFTGKKGARELIKR -YI-CMEGHDCVNTMANVS-SQSAVWKFDRFMCDLMHLMQD--------GDL--------------------CSY--PS FLPMCMFVAQVIAKGE--LVLPDSSTEESISFMYLQHHSFSDKRCLNYLNVGDNEDSKHAKEKFLRG----DG---GIG Q-H-VIINVWNAVMGRLNHHEPPPEVVFPLRQ--GGPDSKPM------------------VDGFFDK-D-D------DR PGIANAMNVPCG----DQWGGPIRGWCSSQHKFGLAFHTLSHDPSKLHRTFAINQKMGANVYQQMSRMKTPAPFYPHVM EYAHGV----CKPFYEHSEPKNEAQGNGEKVPMNVPCVESMDCQYENKHAPMAKLMHQDSLYLMRMHTMSKPLMGDVNG NRCRADLTSLKMLQLDIGFCAKTLGQ-NIVVPKLLGHRTWLATAPLQ------IGCGMMIFGNKIGSTQ-EFEAAADPT VIYFYRNRIVRRMADVISTVRMKSPQEMRG------QDVDSHSLGDDDNQA--MNILPYWVCVCSGVVRKEHPDTSINK RGRTWLAKI-DFLFPPAFNQGEH---HAEQKPVFGFKHPHCGPWEFVICNYQTVAKDIILLLYIKDV-GCNCVLELLAW QLCTSH-GDWQSLVADSCIWA-HNVAVCAWKRELVPGL---NHSCEHLAKHIYFQPDGE-------DEGKMILKIFGLD WCEVERSHQR-SGVDDNYK-VNMLPFSHSKHNPVGHYVQGLGD-ALRLIRPGT---ARALNILF-YGCQYCSGEFQDSE E-ERSWIYN-VYCEI----KKDHKSAILAHKYKHKGMDWDTGKE-MEQGPK-VVNYNLLFY-------AEPNIHGQPRV GHIFV-GNDAHRKLIAGVSFMSIMKFMCEACVPFFRKLFMAVRQMRYMYMNNYDIKIWETAFRGGVYAIENSWETLVLC -EMTSGRSGAKMNHLPRLFEQVC-PPCLLTVSGLKETV-AGIIDGTEITKTPEIGIC--MWDTIHFKNPVMQYCCYYKE NRSVR-----------VNNTGGAELPK---------------------YHNQNTLVD-ELYSDV---YPAAPKKQ--HY M-GVGDVGGYEVMGEDLFQILVCE >t91 MIWSECRQMVREK--NDNAIDNQ-C------------LIAARVKEDSGHVKQVRNASEDIM--YNHFTGKKGARELIKR -YI-CMEGHDCVNTMANVS-SQSAVWKFDRFMCDLMHLMQD--------GDL--------------------CSY--PS FLPMCMFVAQVIAKGE--LVLPDSSTEESISFMYLQHHSFSDKRCLNYLNVGDNEDSKHAKEKFLRG----DG---GIG Q-H-VIINVWNAVMGRLNHHEPPPEVVFPLRQ--GGPDSKPM------------------VDGFFDK-D-D------DR PGIANAMNVPCG----DQWGGPIRGWCSSQHKFGLAFHTLSHDPSKLHRTFAINQKMGANVYQQMSRMKTPAPFYPHVM EYAHGV----CKPFYEHSEPKNEAQGNGEKVPMNVPCVESMDCQYENKHAPMAKLMHQ-SLYLMRMHTMSKPLMGDVNG NRCRADLTSLKMLQLDIGFCAKTLGQ-NIVVPKLLGHRTWLATAPLQ------IGCGMMIFGNKIGSTQ-EFEAAADPT VIYFYRNRIVRRMADVISTVRMKSPQEMRG------QDVDSHSLGDDDNQA--MNILPYWVCVCSGVVRKEHPDTSINK RGRTWLAKI-DFLFPPAFNQGEH---HAEQKPVFGFKHPHCGPWEFVICNYQTVAKDIILLLYIKDV-GCNCVLELLAW QLCTSH-GDWQSLVADSCIWA-HNVAVCAWKRELVPGL---NHSCEHLAKHIYFQPDGE-------DEGKMILKIFGLD WCEVERSHQR-SGVDDNYK-VNMLPFSHSKHNPVGHYVQGLGD-ALRLIRPGT---ARALNILF-YGCQYCSGEFQDSE E-ERSWIYN-VYCEI----KKDHKSAILAHKYKHKGMDWDTGKE-MEQGPK-VVNYNLLFY-------AEPNIHGQPRV GHIFV-GNDAHRKLIAGVSFMSIMKFMCEACVPFFRKLFMAVRQMRYMYMNNYDIKIWETAFRGGVYAIENSWETLVLC -EMTSGRSGAKMNHLPRLFEQVC-PPCLLTVSGLKETV-AGIIDGTEITKTPEIGIC--MWDTIHFKNPVMQYCCYYKE NRSVR-----------VNNTGGAELPK---------------------YHNQNTLVD-ELYSDV---YPAAPKKQ--HY M-GVGDVGGYEVMGEDLFQILVCE >t49 MIWSECRQMVREK--NDNAIDNQ-C------------LIAARVKENSGHVKQVRNASEDIM--YNHFTGNKGACELIKG -YV-CMEGHDCVNTMANIS-SQSAVWKFDRFMCDLMHLMQD--------GDL--------------------CSY--PS FLPMCMFVAQVIAKGE--LVLPDSSNEESISFMYLQHHSFSDKRCLNCLNVGDNEDSKHLKEKFLRA----DG---GIG Q-H-VIINMWNAVMGRLNHHEPPPEVVYPLRQ--GGPNSKPM------------------VDGFFDK-D-D------DR PGIANAMNVPCG----DQWGGPIRGWCSSQHRFGLAFHTLKHDPSKLHRTFAINEKMGANVYQQNSRMKASAPFYPHVM EYAHGV----CKPFYEHSEPKNDAQGNGDKVPMNVPCVESMDCQYENKHAPMAKLMHQ-SLYLMRMYTMSKPIMGDVNG NRCRAELTSLKMLQLDIGFSAKTLGQ-NIVPPKLLGHRTWLATSPLQ------IGCGVMIFGNKIGSTQ-EFESAADPT VIYFYRNRIVKRMADVISTVRMKSPQEMRG------QDVDSHSLGDDDNQA--MNILPYWVCVCSGVVRKEHPDTSINK RGRTWLAKI-DFLFPPAFNQGEH---HAEQKPVFGFKHPHCGPWEFVICNYQTMAKDIILLLYIKDV-GCNCVLELLAW QLCTSH-GDWQSLVADSCIWA-HNVAVCAWKRELVPGL---NHSCEHLAKHIYFQPDGE-------DEGKMIIKIFGLD WCEVEKSHQR-SGVDDNYK-VNMLPFSHSNHNPVGHYVQGLGD-ALRLIRPGT---ARALNILF-YGCEYYSGEFQDSE E-ERSWIYN-VYCEI----KKDHKSAIMAHKYEHKGMDWDTGKE-MEQGPK-VVHHNILFY-------AEPNIHGQPRV GHIFV-GNDAVRNLIAGVSFMSIMKFMCEACVPFFRKLFMAVRQMRYMYMNNYDIKIWETAFRGGVYAMENSWETLVLC -EMTSGRVGARMNHLPRLFEQVC-PPCILTVSGLKETV-AGVIDGTEITKTPEIGIC--MWDTIHFKNPVMQYCCYYKE PRSVR-----------VNNTGGAELPK---------------------YHNFNALVD-ELYSDV---YPAAPKKK--HY M-GVGDVGGYEVMGEDLFQILVCE >t52 MIWSEERQMVREK--VDNAVDNQ-C------------LIAARVKENCGHVKQVRNASEDIL--YNHFTGKKGARELIKR -YI-CMEGHDCVNTMAHDS-SQSATWKFDRFMCDLMHLMQG--------GDL--------------------CSY--PS FLPVCMFVAQVINKGE--LVLPDSSNEESISFMYLQHHSFSEKRCLNYLNVGDNEDSKHLKEKFLRA----DG---NIG Q-H-VIINMWNAVMGRLNHHEPPAEVNFPLRQ--GGPDSKPM------------------VDGFFDK-D-D------DS PGIANAMNVPCG----DQWGGPIRGWCSSQHKFGLASHTLSHDPSKLHRTFGINKNMGANVYQQTSRMKANAPFYPHVM EYAHGV----CKPFYEHSEPKNEAQGNGEK-PMNVPCVESIECQYENKHATMAKLMHD-SLYLMRMHTMSKPVMGDVNG HRCRADLTALKMLQLDIGFSAKTLGQ-NIVAPKLLGVRTWLATSPLQ------IGCGVMIFGNKIGSTQ-EFEAAADPT VIYFYRNRIIRRVADVISTVRMKSPQEMRE------QDVDSHSLGDDDNQA--MTILPHWVCVVSGVVRKEHPDTSINK RGSTWLAKV-DFLCPPAFNQGEH---HAEQKPVFGFKHPHVGPWEFVICNYQTSAHDIILLLYIKDV-GCNCVLELLVW QLCTSH-GDWQSLVADSCVWA-HNVAVCAWKRELVPGL---NHSCERLAKHIYFQPDGE-------DEGKMILKIFELN WCEVEKSHQH-SGVDDNYQ-VNMLPFDHCKHNPVGHYVQGLED-ALGLIRPGT---ARALNILF-YGCQYYSGEFQDSE A-ERSWIYN-VYCEI----KKDHKSGIMAHKYDHKGVDWDTGKE-MEQGPK-VLHHNLLFY-------AEPNIHGQPTV GHIFV-GNDARRKLIAGVSFMSVVKFMCEACVPFFRKLFMAFRQMRYDYVNNYDIKIWETVFRGGVYAIENSWETLILC -EMTSGRCGAKMNHMPRLFEQVC-PPCILTVSGLKETM-AGVIDGTEITKTPDIGIG--MWDSIHFKNPVMQYCCYYKE NRSVR-----------VNNTGGAELPK---------------------YHHQNSLVD-ELYSDV---YPAAPKKK--HY M-GVGDVGGYEVMGEDLFQILVCE >t43 MIWSEVRQMVREG--SDNAIDNR-C------------LIAARVKHVQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR -YL-CMEGHDCVNTMAKPS-SQSAVWKFDRFMCALMHLMQK--------GDL--------------------CSH--PS FLGMCMFHEQVRCKGE--LVLPDSPNEESVSFVYIQHHSFSDKRCLNCLNVGDNDDSEHLKEKHLRI----YG---QIG H-H-VIINVWNAFMGRLNHHEPPAEVVYPLRK--GGPDSK-M------------------VDGFFDK-E-D------DR PGIANAVSVPCA----DQVGGPIRGWCSSQVNFGLANHTLVQ-PSKLHRTFKINKKMGANVYQQTSRMKAEAPVYPHIM EYAHGV----CKPFYERSEPKNEAQGNGEK-QGNVPCVESMECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDG HRCRAGLTALKVVQLDIGFSSKTLGQLNIIDCKLLGHRTWLATSPLQ------IGVDVMVMGNKIGSPS-EFEVAADPT IIWFYRNCIVHKLADTVSTAKMKSPQEMRH------QDVDSPSLGDDDQQA--MDILPYWVCVTSGVVRKEQPD-SVNK RGRTWLAKN-DFLCPPAFNQGEH---EREQRPVYGFKHPGCRPWQFVIANYQTSAKNIIMLLYVKDV-SCNGVLQLLNW HLCDSH-GDWQSLVADSCCWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVLKIFELD WVEMEKSHQQ-SSVDDNYL-VNMLPFLHSRQNPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCEYYSGQFPDGE A-ERSWIYD-VYCEM----KKDHKSAVIAHKHEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGV SHVFM-GNDATRKLITGVSVMPTYKFMCGACVPFFHKLFMAVRNMRYDYNVNYDIKIWETHIRHGVYAVENSWETLVTC -EMTSGRIGAKINHLPRLPEQVI-PPCVLTVSGLKQPM-AGYNDQTEITKTPDICIC--TWGTIHFKNFVMQYCVYYKE NRSVR-----------VNNTGGAELPQ---------------------YHHQNILVD-ELYSDM---YPAAPKKK--HY V-GAGDVGGYEVMSEDLFQILVHE >t1 MIWSEVRQMMREG--TDNAIDNQ-C------------LIAARVKHMQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR -YL-CMEGHDCVNTMAKPS-SQSAVWKFDRFMCCLMHLMQN--------GDL--------------------CSH--PS FLGMCMFVEQVRCKGE--LVLPDSPNEESLSFVYIQHHSFSDKRCLNCLNVGDNDDSEHLKEKHLRI----YG---QIG R-H-VIINVWNAFMGRLNHHEPPAEVVYPLRK--GGPDSKPV------------------VDGFFDK-E-D------DR PGIANAVSVPCA----DQVGGPIRGWCSSQVKFGLANHTLVQ-PSKLHRTFKINKKMGANVYQQTSRMKAEAPVYPHIM EYAHGV----CKPFYERSEPKNEAQGNGEK-QGNVPCVESMECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDG HRCRAKLTALKIVQLDIGFSAKTLGQLNIIDCKLLGRHTWLATSPLQ------IGCDMMVMGNKIGSPS-EFEVAADPT IIWFYRDCIVHKLADTVSTAKMKAPQEMRV------QDVDSPSLGDDDQQE--MDILPYWVCVTSGVVRKEQPD-SVNK RGRTWLAKN-DFLCPPAFNQGEH---EAEQRPVYGFVHPRCRPWQFVIANYQTSAKNIIMLLYVKDV-SCNGVLQLLNW QLCDSH-GDWQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVLKIFELD WVEMEKSHQQ-SSVDDNYL-VNMLPFMQSRENPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCEYYSGQFPDGE A-ERSWIYD-VYCEM----KKDHKSAVVAHKVEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGV SHVFM-GNDATRKLITGVSVMPTYKFMCGACVQFFHKLFMAVRNMRYDYTVNYDIKIWETHIRQGVYAVENSWETLITC -EMTSGRIGAKINHLPRLPEQVV-PPCILTVSGLKQPM-AGYNDETQITKTPDICIC--TWGTIHFKNSVMQYCCYYKE NRSVR-----------VNNTGGAELPK---------------------YHHQNILVD-ELYSDM---YPAAPKKK--HY V-GAGDVGGYEVMSEDLFQILVHE >t86 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKPVQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR -YL-CMEGHDCVNTMAKPS-SQSAVWKFDRFMCALMHLMQN--------GDL--------------------CSH--PS FLGMCMFHEQVRRKGE--LVLPDSPNEESVSFVYIQHHSFSDKRCLNCLNVGDNDDSEHLKEKHLRI----YG---QIG R-H-VIINVWNAFMGRLNHHEPPAEAVYPLRK--GGPDSKPM------------------VDGFFDK-E-D------DR PGIANAVSVPCA----DQVGGPIRGWCSSQVKFGLANHTLVQ-PSKLHRTFKINKKMGANVYHQTSRMKAEAPVYPHIM EYAHGV----CKPFYERSEPKNEAQGNGEK-QGNVPCVESMECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDG HRCRAGLTALKIVQLDIGFSAKTLGQLNIIDCKLLGHRTWLATSPLQ------IGCDVMVMGNKIGSPS-EFEVAADPT IIWFYRNCIVHKLADTVSTAKMKAPQEMRV------QDVDSPSLGDDDQQA--MDILPYWVCVTSGVVRKEQPD-SVNK RGRTWLAKN-DFLCPPAFNQGEH---GAEQRPVYGFQHPRCRPWQFVIANYQTFAKNIIMLLYVKDV-SCNRVLQLLNW QLCDSH-GDWQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVLKIFELD WVEMEKSHQQ-SSVDDNYL-VNMLPFIHSRDNPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCEYYSGQFPDGE A-ERSWIYP-VYCEM----KKDHKSAVVAHKAEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGV SHVFM-GNDATRKLITGVSVMPTYKFMCGACVPFFHKLFMAVRNMRYDYTVNYDIKIWETHIRHGVYAVENSWETLVTC -EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGYNDETQITKTPDICIC--GVGTIHFKNLVMQYCCYYKE NRSVR-----------VNNTGGAELPK---------------------YHHQNILVD-ELYSDM---YPAAPKKK--HY V-GAGDVGGYEVMSEDLFQILVHE >t72 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHVQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR -YL-CMEGHDCVNTMAKVS-SQSAVWKFDRFMCALMHLMQN--------GDL--------------------CSH--PS FLGMCMFHEQVRRKGE--LVLPDSPNEESVSFVYIQHHSFSDKRCLNCLNAGDNDDSEHLKEKHLRT----YG---QIG R-H-VIINVWNAFMGRLNHHEPPAEVVYPLRK--GGPDSKPM------------------VDGFFDK-E-D------HR PGIANAVSVPCA----DQVGGPLRGWCSSQVKFGLANHTLVQ-PSKLHRTFKINKKIGANVYQQTSRMKADAPVYPHIM EYAHGV----YKPFYERSEPKNEAQGNGEK-QGNVPCVESVECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDG HRCRAGLTALKIVQLDIGFSAKTLGQLNIIDCKLLGHRTWLATSPLQ------IGCDVMVMGNKIGSPS-EFEVAADPT IIWFYRNCIVHKLADTVSTAKMKAPQEMRH------QDVDSPSLGDDDQQA--MDILPYWVCVTSGVVRKEQPD-SVNK RGRTWLAKN-DFLCPPAFKQGEH---EAEQRPVYGFQHPRCRPWQFVIANYQSSAKNIIMLLYVKDV-SCNGVLQLLNW QLCDSH-GDWQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGQMVLKIFELD WVEMEKSHQQ-SSVDDNYL-VNMLPFIHSRENPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCEYYSGQFPDGE A-ERSWIYD-VYCEM----KKDHKSAVVAHKVEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGV SHVFM-GNDATRKLITGFSIMPTYKFMCGACVPFFHKLFMAVRNMRYDYTVNYDIKIWETHIRHGVYAVENSWETLVTC -EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPV-AGYNDETQITKTPDICIC--TWGTIHFKNSVMQYCCYYKE NRSVR-----------VNNTGGAELPK---------------------YHHQNILVD-ELYSDM---YPAAPKKK--HY V-GAGDVGGYEVMSEDLFQILVHE >t96 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHVQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR -YL-CMEGHDCVNTMAKPS-SQSAVWKFDRFMCALMHLMQV--------GDL--------------------CSH--PS FLGMCMFHEQVRRKGE--LVLPDSPNEESVSFVYIQHHSFSDKRCLNCLNVGDNDDSEHLKEKHLRV----YG---QIG R-H-VIINVWNAFMGRLNHHEPPAEVVYPLRK--GGPDSKLM------------------VDGFFDK-E-D------ER PGIANAVSVPCA----DQVGGPIRGWCSSQVKFGLANHTLVQ-PSKLHRTFKINKKIGANVYQQTSRMKAEAPVYPHIM EYAHGV----CKPFYERSEPKNEAQGNGEK-QGNVPCVESMECQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDG HRCRAGLTALKIVQLDIGFSAKTLGQLNIIDCKLLGHRTWLATSPLQ------IGCDVMVIGNKIGSPS-EFEIAADPT IIWFYRNCIVHKLADTVSTAKMKAPQEMRV------QDVDSPSLGDDDQQA--MDILPYWVCVTSGVVRKEQPD-SVNK RGRTWLAKN-DFLCPPAFNQGEH---EAEQRPVYGFQHPRCRPWQFVIGNYQTSAKNIIMLLYVKDV-SCNGVLQLLNW QLCDSH-GDWQSLVADSCMWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVLKIFELD WVEMEKSHQQ-SSVDDNYL-VNMLPFIRSRENPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCEYYSGQFPDAE A-ERSWIYD-VYCEM----KKDHKSAVYAHKVEHKGMDWDTGKE-MNQGPK-IVLHGVLFN-------AEPNIHGQPGV SHVFM-GNDATRKLISGVSVMPTYKFMCGACVPFFHKLFMAVRNMRYDYTVNYDVKIWETHIRHGVYAVENSWETLVTC -EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGYNMETQITKTPDICIC--TWGTIHFKNSVMQYCCYYKE NRSVR-----------VNNTGGAELPK---------------------YHHQNILVD-ELYSDV---YPAAPKKK--HY V-GAGDVGGYEVMSEDLFQILVHG >t44 MIWSEVRQMVREG--TDNAIDTR-C------------LIAARVKHMQGHVKQVRNANEDAL--YQNFSGQKGAREKIKR -YL-CMEGHDCVNTMAKPS-SQSAVWKFDRFMCDLMHLMQN--------GDL--------------------CSH--PS FLGMCMFHEQVRCKGE--LVLPDSPNEESVSFVYIQHHSFSDKRCLNYLNVGDNDDSEHLKEKHLRI----YG---VIG K-H-VIINVWNAFMGRLNHHEPPADVEYPLRK--GGPDSKPM------------------VDGFFDK-E-D------DR PGIANAVSVPCG----DQVGGPIRGWCSSQVKFGLANHTLAQNPSKLHRTFKINKKMGANVYQQTSRMKAEAPVYPHIM EYAHGV----CKPFYDRSEPKNEAQGNGEK-QGNVPCVESMDCQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGYVDG HRCRAGLTALKIVQLDIGFSAKTLGQLNIIDCKLLGHRTWLATSPLQ------IGCDIMVMGNKIGSPS-EFEVAADPT IIWFYRNCIVHKLADVVSTAKMKSPQEMRV------QDVDSPSLGDDDQQA--MDILPYWVCVTSGVVRKEQPD-SVNK RGRTWLAKN-DFLCPPAFEQGEH---EAEQCPVHGFKHPRCRPWQFVIADYQTSAKNIIMLLYVKDV-SCNGVLQLLNW QLCDSH-GDWQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVLKIFELD WVEMEKSHQQ-SSVDDNYL-VNMLPFMHSRENPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCEYYSGQFPDCE A-ERSWIYD-VYCEM----KKDHKSAVVAHKHEHKGMDWDTGKE-MNQGPK-VVMHGVLFN-------AEPNIHGQPGV SHVFM-GNDATRKLITGVSVMPTYKFMCNACVPFFHKLFMAVRNMRYDYTVNYDIKIWETHMRHGVYAVENSWETLVTC -EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGYNDETEITKTPDICIC--TWGTIHFKNSVMQYCCYYKE NRSVR-----------VNNTGGAELPK---------------------YHHQNMLVD-ELYSDM---YPAAPKKK--VY V-GAGDVGGYEVMSEDLFQILVHA >t45 MIWSEVRQMVREG--TDNAIDNC-C------------LIAARVKHMQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR -YK-CMEGHDCVNTMAKSS-SQSAVWKFDRFMCDLMHLMQN--------GDL--------------------CSV--PS FLGMCMFHEQVRCKGE--LVLPDSPNEESVSFVYIQHHSFSDKRCLNYLNVGDNDDSEHLKEKHLRI----YG---EIG K-H-VIINVWNAFMGRLNHHEPPAEVMYPLRK--GGPESKPM------------------VDGFFDK-E-D------DR PGIANAVSVPCT----DQVGGPIRGWCSSQVKFGLANHTLVHNPSKLHRTFAINKKMGANVYQQTSRMKAEAPVYPHIM EYAHGV----CKPFYERSEPKNEAQGNGEK-QGNVPCVESMDCQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDG HRCRAGLTALKIVQLDIGFSAKTLGQLNIIDCKLLGHRTWLATSPLQ------IGCDVMVMGNKIGSPA-EFEVAADPT IIWFYRNCIVHKLADLVSTAKMKSPQEMRV------QDVDSPSLGDDDQEA--MDILPYWVCVMSGVVRKEQPD-SVNK RGRTWLAKK-DFLVPPAFNQGEH---EAEQKPVYGFDHPRCRPWQFVIANYQTSAKNIIMLLYVKDV-SCNGVLQLLNW QLCDSH-GDWQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVHKIFELD WVEMEKSHQQ-SSVDDNYL-VNMLPFFHSRENPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCQYYSGQFPDGE A-ERSWIYD-VYCEM----KKDHKSAVVAHKHQHKGMDWDTGKE-MNQGPK-VVLHGVLFN-------AEPNIHGQPGV SHVFM-GNDATRKLIPGVSVMHDYKFMCGACVPFFHKLFMAVRNMRYDYTVNYDIKIWETHLRHGVYAVENSWETLVTC -EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGFVDETEITKTPDICIC--TWGTIHFKNSVVQYCCYYKE NRSVR-----------VNNTGGAELPK---------------------YHHQNILVD-ELYSDM---YPAAPKKK--HY V-GAGDVGGYEVMSEDLFQILVHE >t74 MIWSEVRQMVREG--TDNAIDNC-C------------LIAARVKHMQGHVKQVRNANEDAL--YQNFTGQKGAREKIKR -YL-CMEGHDCVNTMAKPS-SQSAVWKFDRFMCDLMHLMQN--------GDL--------------------CSV--PS FLGMCMFHEQVRCKGE--LVLPDSPNEESVSFVYIQHHSFSDKRCLNYLNVGDNDDSEHIKEKHLRI----YG---EIG K-H-VIINVWNAFMGRLNHHEPPAEVMYPLRK--GGPESKPM------------------VDGFFDK-E-D------DR PGIANAVSVPCA----DQVGGPIRGWCSSQVKFGLANHTLVHNPSKLHRTFAINKKMGANVYQQTSRMKAEAPVYPHIM EYAHGV----CKPFYERSEPKNEAQGNGEK-QGNVPCVESMDCQYENKHAIMAKLMHP-PLYLMRMETMSKPPMGDVDG HRCRAGLTALKIVQLDIGFSAKTLGQLNIIDCKLLGHRTWLATSPLQ------IGCDVMVVGNKIGSPA-GFEVAADPT IIWFYRNCIVHKLADLVSTAKMKSPQEMRV------QDVDSPSLGDDDQEA--MDILPYWVCVTSGVVRKEQPD-SVNK RGRTWLAKK-DFLVPPAFNQGEH---EAEQKPVYGFDHPRCRPWQFVIANYQTSAKNIIMLLYVKDV-SCNGVLQLLNW QLCDSH-GDWQSLVADSCVWA-HNVAVCGWKRELVPGL---NHSCEMLAKTVYFEPDGE-------DEGKMVHKIFELD WVEMEKSHQQ-SSVDDNYL-VNMLPFFHSRENPVGHYVQGLED-PMHLIRPGT---ARKLNILF-YGCQYYSGQFPDGE A-ERSWIYD-VYCEM----KKDHKSAVVAHKHQHKGMDWDTGKE-MNQGPK-VVLHGVLFN-------AEPNIHGQPGV SHVFM-GNDATRKLIPGVSVMHDYKFMCGACVPFFHKLFMAVRNMRYDYTVNYDIKIWETHLRHGVYAVENSWETLVTC -EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQPM-AGFVDETEITKTPDICIC--TWGTIHFKNSVVQYCCYYKE NRSVR-----------VNNTGGAELPK---------------------YHHQNILVD-ELYSDM---YPAAPKKK--HY V-GAGDVGGYEVMSEDLFQILVHE >t26 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHMSGHVKQVRNANEDAL--YQNFTGKKGAREKIKR -YL-CMEGHECVVTMAKNS-SISAMWKFDRFMCDLMHLMQN--------GDL--------------------CSF--PS FLGMCMFHDQVRCKGE--LVLPDSKNEESVSFVYIQHHSFSDKRCLNYLNVGDNEDSEHLKEKHLRV----YG---NIG K-H-VIINSWNAMMGRLNHHEPAAEVVYPLRK--GGPESKPM------------------VDGFFDK-E-D------DR PGIANAVSVPCD----DQWGGPVRGWCSSQHKFGLADHTLIHIPSKLHRTFAINKKMGANVYQQTSRMKAAAPVYSHVM EYAHKV----VKPFYARSEPKNEAQGNGEK-QWNVPCVESMDCQYENKHATMAKLMHP-PLYLMRMQTMAKPPMGDVDG HRCRAGLTALKIVQLDIGFSQKTLGQ-NIIDSKHLGHRTWLATSPLQ------IGCSAMVMGNKIGSPQ-EVELAADPT VIYFYRNCIVQKMADVVSTVKMKSPQEMRV------QDVDSASLGDDDQQP--MNIIPYWVCVTSGVVHKEQPD-SVNK RGRTWTAKN-DFLCPDAFNQGEH---GAEQKPTHGFEHPRCRPWQFVIPVYQTGAKNIILLLYIKDV-SVNGVLQLLIW QLCDSH-GDWQSLVADSCAWA-VNVAVCGWKRELVPGL---NHACEMLAKTVYFEPDGE-------DEGKMVIKIFGLD WCEIEKSHQQ-SSVDDNYL-VNMLPFTHSRETPVGHYVQGLED-PMHLMRPGT---ARALNILF-YGCEYYSGQFPDGE P-ERSWIYD-VYCEL----KKDHKNAIVAVKHEHKGMDWDTGKE-MNHGPK-VIVHGVLFH-------AEPNIGGQPGV SHVFV-GNDATRKLIAGVSVMPIVKFMCGACVPFFRKLFMALRNMRYDYASNYDIKIVETHLREGVYAVENSWETLVVC -EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQSV-SGYNDETEITKTPDIKIC--TWGTIHFKNSVMQYCCYYKE NRSVR-----------LNNTGGAELPK---------------------YHHQNVLVD-ELYGEA---YPAAPKKK--HY M-GAGDVGGYEVMSEDLFQILVHE >t61 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHMSGHVKQVRNANEDAL--YQNFTGQKGAREKIKR -YL-CMEGHECVNTMAKNS-SVSAVWKFDQFMCDLMHLMQN--------GDL--------------------CSF--PS FLGMCMFHEQVRCKGE--LVLSDSKNEESVSFVYIQHPSFSDKRCLNYLNVGDNEDSEHLKEKVLRV----YG---DIG K-H-VIINVWNAMMGRLNHHEPAAEVVCPLRK--GGPDSKPM------------------VDGFFDK-E-D------DR PGVANAVSVPCD----DQWGGPVRGWCSSQHKFGLADHTLVHIPSKLHRTFAITKKMGANVYQQTSRMKATAPVYPHVM EYAHGV----VKPFYDRSEPKNEAQGNGEK-QWNVPCVESMDCQYENKHATMAKLMHP-PLYLVKMQTMSKPPMGDVDG HRCRAGLTALKIVQLDIGFSSKTLGQ-NIIHSKLLGHRTWLATSPLQ------IGCDVMVMGNKIGSPQ-EVELAADPS VIYFYQNCIVQKMADVVSTVKMKSPQEMRV------QDVDSASLGDDDQQP--MNIIPYWVCVTSGVVRKEQPD-SVNK RGHTWTAKN-DFLCPPAYNQGEH---GAEQKPVHGFEHPRCRPWQFVISVYRTGAKNINLLLYIKDV-SCNGVLQLLNW QLCDSH-GDWQSLVADSCAWA-VNVAVCGWKRELVPGL---NHACEMLAKSVYFEPDGE-------DEGKMVLKIFGLD WCEIEKSHQQ-SSVDDNYL-VNMLPFAHSRQTPVGHYVQGLED-PMHLMRPGT---ARALNILF-YGCEYYSGHFPDGE A-ERSWIYD-MYCEL----KKDHKSAIVAVKHCHKGMDWDTGKE-MNHGPK-VIIHGLLFH-------AEPNIGGQPGV SHVFV-GNDATRKLIAGASVMFIVKFMCGACVPFFRKLFMALRNMRYDYSQNYDIKIVETHLREGVYAIENSWETLVVC -EMTSGRMGAKINHLPRLPEQVI-PPCILTVSGLKRAV-NGYNDETEITKTPDIKIC--TWGTIHFKNSVMQYCCYYKE NRSVR-----------FNNTGGAELPK---------------------YHHQNVLVD-ELYSEL---YPAAPKKA--HY M-GAGDIGGYEVMSEDLFQILVHE >t97 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHMSGHMKQVRNANEDAL--YQNFTGQKGAREKIKR -YL-CMEGHECVNTMAKNS-SVSAVWKFDRFMCDLMHLMQN--------GDL--------------------CSF--PS FLGMCMFHQQVRCKGE--LVLPDSKNEESVSFVYIQHHSFNDKRCLNYLNVGDNEDSEHLEEKHLRV----YG---NIG K-H-VIINVWNAMMGRLNHHEPAAEVVCPLRK--GGPDSKPM------------------VDGFFDK-E-D------DR PGVANAMSVPCD----DQWGGPVRGWCSSQHKFGLADHTLVHVPSKLHRTFPINKKMGANVYQQTSRMKATAPVYPHVM EYAHGV----IKPFYDRSEPKNEAQGNGEK-QWNVPCVESMDCQYENKHATMAKLMHP-PLYLVKMQTMSKPPMGDVDG HRCRAGLTALQIVQLDIGFSAKTLGQ-NIIHSKLLGHRTWLATSPLQ------IGCDVVVMGNKIGSPQ-EVELAADPS VIYFYQNCIVQKMADVVSTVKMKSPQEMRV------QDVDSASLGDDDQQP--MNIIPYWVCVTSGVVRKEQPD-SVNK RGRTWSAKN-DFLCPPAFNQGEH---GAEQKPVHEFQHPRCRPWQFVISVYRTGAKNINLLLYIKDV-SCNGVLQLLNW QLCDSH-GDWQSLVADSCAWA-VNLAVCGWKRELVPGL---NHACEMLAKSVYFEPDGE-------DEGKMNLKIFGLD WCEIEKSHQQ-SSVDDNYL-VNMLPFTHSRETPVGHYVQDLED-PMHLMRPGT---ARALNILF-YGCEYYSGHFPDGE P-ERSWIYD-MYCEL----KKDHKSAIVAVKHCHKGMDWDTGKE-MNHGPK-VIIHGVLFH-------AEPNIGGQPGV SHVFV-GNDATRKLIAGVSVMPIVKFMCGACVPFFRKLFMALRNMRYDYSQNYDIKIVETHLREGVYAVENSWETLVVC -EMTSGRIGAKINHLPRLPEQVI-PPCILTVSGLKQAV-NDCNDETEITKTPDIKIC--TWPTIHFKNGMMQYCCYYKE NRSVR-----------FNNTGGAELPK---------------------YHHQNVLVD-ELYSEV---YPAAPKKA--FY M-GAGDIGGYEVMSEDLFQILVHK >t17 MIWSEVRQMVREG--ADNAIDNR-C------------LIAARVKHFSGHTKQVRNANEDAL--YQNFTGQKGAREKIKR -YL-CMEGQDCVNTMAKSS-SQSAVWKFDRFMCDLVHLMQN---GARGSGDL--------------------CSF--PS FLGMCMFHEQVRCKGE--LVLPDSKNEESVSFVYIQHHSFSDKRCLNELNVGDNEDSIHLKEKHLRI----YG---NIG K-V-VIINVWNAMMGRLNHHEPAAEVVYPLRK--GGPDSKPM------------------VDGFFDK-E-D------DR PGIANAVAVPCS----DQWGGPVRGWCSSQHKFGLADHTLVHVPSLLHRTFAINRKMGANVYQQTSRMKAAAPVYPHMM EYAHGV----VKPFYERSEPKNEAQGNGEK-QWNVPCVQSVDCQYENKHATMAKLMHP-PLYLMRMETMSKPPMGDVDG VRCRAGLTALKIVQLDIGFSAKTLGQ-NIIDSKLLGHRTWLATSPLQ------IGCDVMVVGNKIGSPQ-EFELAADPT VIYFYRNCIVQKLADVVSTVKMKSPQEMRV------QDVDSPSLGDDNQQS--MNIIPYWVCVHSGVVQKEQPD-SVHK RGRTWTAKN-DFLCPPAFDQGEH---GAEQKPVHGFEHPRCRPWQFVIANYQTGAKNIILLLYVKDV-SCNGVLQLLNW QLCDSH-GDWQSLSADSCAWA-HNVAVCGWKRELVPGL---NHACEMLAKTVYFEPDGE-------DEGKMVIKIFGLD WCEMEKAHQQ-SSVDDNYL-VNMLPFTHSRENPVGHYVQGLED-PMHLIRPGT---ARALNILF-YGCEYYSGQFPDGE P-ERSWIYD-VYCEL----KKDHKSAIVAVKHEHKGMDWDTGKE-MNQGPK-VIIHGVLFH-------AEVNIHGQPGV SHVFV-GNDATRKLIAGVSVMPFVKFMCGACVPFFRKLFMALRNMRYDYTSNYDIKIIETHLRNGVYAVENSWETLVVC -EMTSGRIGAKINHLPRLPEQVI-PPCMLTVSGLKQAM-AGVNDETEITKTPDIKIC--TWGTIHFKNFVMQYCVYYKE NRSVR-----------FNNTGGAELPK---------------------YHHQNALVD-ELYSDV---YPAAPKKK--HY M-GEGDVGGYEVMSEDLFQILVHA >t11 MIWSEVRQMVREG--TDNAIDNR-C------------LIAARVKHFSGHTKQVRNANEDAL--YQNFTGQKGAREKIKR -YL-CMEGQDCVNTMAKSS-SQSAVWKFDRFMCDLVHLMQN---GARGSGDL--------------------CSF--PS FLGMCMFHEQVRCKGE--LVLPDSKNEESVSFVYIQHHSFSDKRCLNSLNVGDNEDSIHLKEKHLRI----YG---DIG K-H-VIINVWNAMMGRLNHHEPAAEVVYPLRK--GGPDSKPM------------------VDGFFDK-E-D------DR PGIANAVAVPCS----DQWGGPVRGWCSSQHKFGLADHTLVHVPSLLHRTFAINKKMGANVYQQTSRMKAAAPVYPHMM EYAHGV----VKPFYERSEPKNEAQGNGEK-QWNVPCVQSVDCQYENKHATMAKLMHP-PLYLMRMETMSKPPMGDVDG VRCRAGLTALKIVQLDIGFSAKTLGQ-NIIDSKLLGHRTWLATSPLQ------IGCDVMVVGNKIGSPQ-EFELAADPT VIYFYRNCIVQKLADVVSTVKMKSPQEMRV------QDVDSPSLGDDNQQS--MNIIPYWVCVHSGVVQKEQPD-SVHK RGRTWTAKN-DFLCPPAFDQGEH---GAEQKPVHGFEHPRCRPWQFVIANYQTGAKNIILLLYVKDV-SCNGVLQLLNW QLCDSH-GDWQSLSADSCAWA-HNVAVCGWKRELVPGL---NHACEMLAKTVYFEPDGE-------DEGKMVIKIFGLD WCEMEKAHQQ-SSVDDNYL-VNMLPFTHSRENPVGHYVQGLED-PMHLIRPGT---ARALNILF-YGCEYYSGQFPDGE P-ERSWIYD-VYCEL----KKDHKSAIVAVKHEHKGMDWDTGKE-MNQGPK-VIIHGILFH-------AEVNIHGQPGV SHVFV-GNDATRKLIAGVSVMPFVKFMCGACVPFFRKLFMALRNMRYDYTSNYDIKIMETHLRNGVYAVENSWETLVVC -EMTSGRIGAKINHLPRLPEQVI-PPCMLTVSGLKQAM-AGVNDETEITKTPDIKIC--TWGTIHFKNFVMQYCVYYKE NRSVR-----------FNNTGGAELPK---------------------YHHQNVLVD-ELYSDG---YPAAPKKK--HY M-GEGDVGGYEVMSEDLFQILVHA >t23 MIWSECQRMLREH--EDQAIANE-C------------LIAFKVKLVNGHVKQVRNASEDAH--YTFFAGEKGARELIKR -YS-CMEGDGCVNTMAEGR-SESAVWKFDPFMCQLTHSMQS--------GDL--------------------CSV--PS FLGVCMFQRQVVMKGE--LVLPDSADEGSVSFMYIQHHSFAAKRCLNVLNVGDNEDSVHLRHKQLRV----YG---KIG K-H-VIISVPNAIMGRLNHHEPSAIVVFPLRQ--RGADSKSV------------------GDGFYDK-D-E------DR PGIANAVSVPVA----EQWVGSVRGWYSSEHKYGLANHILI-GPSKLHRTYQTTAKMGANVVKATNRMKRPQPVYPHVM EYANGV----VKPFYEVAESKNEAQGNGEK-PVNVPCVESPDCQYESKHARVNKLMHP-SLYLMCMEAMNKPIMGDWDG NRCRSPLCLLKVIQLDMGVSGKTLGQ-NIVMAKLLGERTWLATSPLQ------IGCDVVAVGKKPESPQ-EFECAADPT VIYFYKNLIIQQAADYVSAVQVKSPQEMRY------QDVNSPSDGDENGQS--MHIGPYWVCVSSEVVKKSQPD-SVDK RGRTWVAKN-EFLCPPDHMQGEHSIEGAEQKPFFGFAGPFPQPWQFVIVNPQTWAHNIIRLLYCKDV-SCNCVLTCLNW VLCDSH-GDWHSLIADACPCA-HNVAVCGWKRELVPGL---NHSNEHMAKTIYFEPDGH-------DEGKMILNIFGLD WCEVETSHQE-SSSDDNHL-VNMLPFGVSRDDPVGHYMLGLED-AIRLYRPGT---ARALNILF---VNYYSGDFQDPE L-ERSQLYN-VYCEQ----KQDHRNAIRASKNDHKGMAWDTAKD-MEQGPK-MVEHQVLFY-------AEPNMHGQPEV IHIFI-GNDAMCMLIPGMSVMVHHKFMRAACMPFFNKLFMAVRYMRYDYVDNYDIKIDETRYRDGVYARRNSWETANVC -EMSSGRSGAKINHNPRLPEQVT-PPRSLTVSGLKDTM-PGHPDVTEITKTPVIAIV--MWGTINLKNHVMQYCCYYKE TRSVR-----------VNNTGGAELPK---------------------YHYQNILVD-ELYSND---YPAAPKKA--QF M-GVGDVGGYEIVCEDIFQILVCD >t76 LSSSHEVRVVNHKSKPDEHIADK-D------------MCADAMCNGSGHTGRIHNAVEGGI--VTNFHGF---AEMPKN -HD-VMEEQGGPGTCLAPQQSADTSWPFVMFTVDLAHRCRG--------GDMPQNTGDSMTCPHLAEGLAAVNS---RA FIGVSIYDLKVSHKMI--QACNPIQN---------DNHAFSDQRCLNDLSQGVVGASFPKMQHGVRH----YK---K-- ------RVYENEVGGRLDDAEAIDDMGVWIRW--EGVKHAWCHPIGSCPDHLVCSLLVLQPARNFIQLS-MVRDTPHRL PKMASAAPVYKQTCHLEQVQGTARMWGKSGPRVVLNGHMLK-TQNTLHRQYLVDVELQATMVFVAARMKTDSSMYMSVR EILHG------------CGIKDEAQADGQR-PATLIVVLSKDNKGITEHGAVLKKIHP----------LCKRCMENGRC LRYKNDLTGDQLVQLAD----NTCAW-NLFEC--LRDSKSLFGSPLFIKVDEDRGFTAP-------SK-------VEPK CAQFYSKSCTHC----------------TM------CSVGSHASEEDALDLYTHKPVPDAQCFVSRVARNIPEH-SPCK -----------------------------P---CS-----------------VEEVCYCKVCDSKDVRAVKNAYQDLQV PLSKLK-APWLSMGHCECWEEDINNILSVVKHELVDDVDRMNRLTEVAAKMAYFGPDGFHWDVELWEENDLNCDDFELG W-NLKH-------EDDHPL-LCIGSFSVHKYVSVMVYPLPMND-CVRMSQPCHAAHAQDIPTEQ---TRYQIHTFLDDS I-RRDLCNQ-GCHEENMVWRDDLKDPISTEVIADKLHEWPTNVN-KENSAD----HRQLFH-------ANSSALDKHQH NVVMN-GSPCIRALIIGGSSVGVNHFMMGPCQEFFTDLFMMYEGLQYACVCAMNITIQCLHTGEGVMCNVKCKEFLQRE DEMKAGLIGIICNHLSRMIMVIL-VHCLLTHAGLKSME-IGFDR-TGVVRMPLIAEP--LYLVI-YVNAVEPYTDAYKK PKSMHQFQFDDCQIRYRTNTGFEETPVGATHLTHVCVCVPHPWTKGKSEILQNMGSA-VLYNDVIRDHNASEQKE--AH V-PMGDEGRISRAKDEIMYIRDLE >t53 LSSSHEVRVVNHKSKPDEHIADK-D------------MCADAMCNGSGHTGRIHNAVEGGI--VTNFHGF---AEMPKN -YD-VMEEQGGPGTCLAPQQSADTSWPFVMFTVDLAHRCRG--------GDIPQNTGDSMTCPHLAEGLAAVNS---RA FIGVSTYDLKVSHKMI--QACNPIQN---------DNHAFSDQRCLNDLAQGVVGASHPKMQHGVRH----YK---K-- ------RVYENEVGGRLDDAEAIDDMGVWIRW--EGVKHAWCHPIGSCPDHLVCSLLVLQPARNFIQLS-MVRDTPHGL PKMASAAPVYKQTCHLEQVQGTARMWGKSGPRVVLNGHMLK-TQNTLHRQYLVDVELGATMVFVAARMKTDLSMYMSVR EILHG------------CGIKDEAQADGQR-PATLIVVLSKDNKGITEHGAVLKKIHP----------LCKRCMENGRC LRYKNDLAGDQLVQLAD----NTCAW-NLFEC--LRDSKSLFGSPLFIKVDEDRGFTAP-------SK-------VEPK CAQFYSKSCTHC----------------TM------CSVGSHASEEDALDLYTHRPVPDAQCFVSRVARNIPEH-SPCK -----------------------------P---CS-----------------VEEVCYCKVCDSKDVRAVKNAYQDLQV PLSKLK-APWLSMGHCECWEEDINNILSVVKHELVDDVDRMNRLTEVAAKMAYFGPDGFHWDVELWEENDLNCDDFELG W-NLKP-------EDDHPL-LCIGSFSVHKYVSVMVYPLPMND-CVRMSQPCHAAHAQDIPTEQ---TRYQIHTFLDDS I-RRDLCNQ-GCHEENMVWRDDLKDPISTEVIADKLHEWPTNQN-KENSAD----HRQLFH-------ANSSALDKHQH NVVMN-GSPCIRALIIGGSSVGVNHFMMGPCQEFFTDLFMMYEGLQYACVCAMNITIQCLHTGEGVVCNVKCKEFLQRE DEMKAGLIGIICNHLSRMIMVIL-VHCLLTHAGLKSME-IGFDR-TGVVRMPLIAEP--LYLVI-YVNAVEPYTDAYKK PKSMHQFQFDDCQIRYRTNTGFEETPMGATHLTHVCVCVPHPWTKGKSEILQNMGSA-VLYNDVIRDHNASEQKE--AH V-PMGDQGRISRAKDEIMYIRDLE >t16 LSSSHEVRVVNHKSKPDEHIADK-D------------MCADAMCNGSGHTGRIHNAVEGGI--VTNFHGF---AEMPKN -YD-VMEEQGGPGTCLAPQQSADTSWPFVMFTVDLAHRCRG--------GDIPQNTGDSMTCPHLAEGLAAVNS---RA FIGVSTYDLKVSHKMI--QACNPIQN---------DNHAFSDQRCLNDLAQGVVGASHPKMQHGVRH----YK---K-- ------RVYENEVGGRLDDAEAIDDMGVWIRW--EGVKHAWCHPIGSCPDHLVCSLLVLQPARNFIQLS-MVRDTPHGL PKMASAAPVYKQTCHLEQVQGTARMWGKSGPRVVLNGHMLK-TQNTLHRQYLVDVELGATMVFVAARMKTDLSMYMSVR EILHG------------CGIKDEAQADGQR-PATLIVVLSKDNKGITEHGAVLKKIHP----------LCKRCMENGRC LRYKNDLAGDQLVQLAD----NTCLW-NLFEC--LRDSKSLFGSPLFIKVDEDRGFTAP-------SK-------VEPK CAQFYSKSCTHC----------------TM------CSVGSHASEEDALDLYTHKPVPDAQCFVSRVARNIPEH-SPCK -----------------------------P---CS-----------------VEEVCYCKVCDSKDVRAVKNAYQDLQV PLSKLK-APWLSMGHCECWEEDINNILSVVKHELVDDVDRMNRLTEVAAKMAYFGPDGFHWDVELWEENDLNCDDFELG W-NLKP-------EDDHPL-LCIGSFSVHKYVSVMVYPLPMND-CVRMSQPCHAAHAQDIPTEQ---TRYQIHTFLDDS I-RRDLCNQ-GCHEENMVWRDDLKDPISTEVIADKLHEWPTNQN-KENSAD----HRQLFH-------ANSSALDKHQH NVVMN-GSPCIRALIIGGSSVGVNHFMMGPCQEFFTDLFMMYEGLQYACVCAMNITIQCLHTGEGVVCNVKCKEFLQRE DEMKAGLIGIICNHLSRMIMVIL-VHCLLTHAGLKSME-IGFDR-TGVVRMPLIAEP--LYLVI-YVNAVEPYTDAYKK PKSMHQFQFDDCQIRYRTNTGFEETPMGATHLTHVCVCVPHPWTKGKSEILQNMGSA-VLYNDVIRDYNASEQKE--AH V-PMGDQGRISRAKDEIMYIRDLE >t42 LSSSHEMRVVHHKSKPDEEIADQ-D------------LCAEEMCKGSGHTGRIHSAVEGGI--WTNFHGF---AEMPKN -YD-VMEEQGGPGTCIAPRQSADTSWPFMVFTVDLAHRCRG--------GDTPTQAGDSMTFPHLAEGLPAVNS---RA FVGVSSYDLRVSHKMI--QACTLIQN---------DNVAFSDQRPLNDLNQGCVGTSHPKMPHGVRH----YK---K-- ------KVYENEVGDRLDDAEGIDDVGVWMRW--NGVKHAWCHIIGSCPDHLVCSLLVLQPGRNFIQLS-VVHDTPHRL PKMASAAAVFKQTCHLEQVPGTARMWGKSGPRVRLNGHVLK-NQNILHRQYLVDVNLGATMVFVLARMKTDASMYMSHR EILHGGTFDAAKPFRQICGTKDEAQPDGRR-PATLLVVLSKDNQGITEHGAVLKHVHP----------LCKKDCNNNRC LRCKNVLAGNQLIQLSD----VTCAW-NLFEC--LGDSESLFGSPLAIKVDEDRGFTAP-------SK-------VEPK CAQFYSKSCTHH----------------MM------CSVGSNASEEDALELYTHKPVPDPQCFVSIVVRNIPEH-SPCK -----------------------------P---CS-----------------VEEVAYCKNCDSKDVRAVQNAYQDLAV PLSKLK-APWLSMGHCECWEEDINNILSMVKHELVVDQDMVNRSPEVAAKMAYFGPDGFHWDVELCEESDLTVDDFELG W-VLKP-------EDDHPL-ICIGSFSVHKQFSVMVYPLPMND-AIRMSQPCHAAHAQDIHTDQ---DRYDVRTFLGHS I-KCHMCNQ-ACHEENIVWRDDLKDPITTEVIAEKLQEWPTNQN-KENVAD----HRQLFH-------ANSSALDKHRH NVMMN-GSACIRTLIIGGSIVGVNVFMMEPCQEFFTDLFMVYEGLQYAVVCAVNIIIQCLHTNEGVVCNVKCKEFLQRE EDMKSGLIGIICNHISRMCMHIL-VVCLLTHCGLKAME-IPFDI-TGVVRGPLITEP--LYLII-YVNAVEPYTDAYKK PKSMHQFQFDDVQIRYRTNTGFEENPKLATHLVHVCVVVPHPWTKGKSEILQNMESA-HLYNAVIRDNNASEQKE--AH V-PVGDEGRISRAKNEILYIRDLE >t24 QSSSHEMRVVHHKSKPDEEITDQ-D------------LCAEEMCKGSGHTGRIHSAVEGGI--WTNFHGF---AEMPKN -YD-VMEEQGGPGTCVAPRQSADTNWPFMVFPVELAHRCRG--------GDTP--------FPHLAEGLPAVNS---RA FIGVSSYDLKVSHKMI--QACNPIQN---------DNVAFSDQRVLNDLSQGCVGTSHPKMPHGVRH----YK---K-- ------RVYENEVGDRLDDAEAVDDVGVWMRW--AGVKVAVCHVIGSCPDHLVVSLLVLQPARNFIQLS-VVHDTPHRL PKMASAAAVFKQTCHLEQVAGTARVWGKSGPRVKLNGHVLK-NQNVLHRQYLVDVDLGATMVFVAARMKTDASMYMSHR EILHGGTFDAAKPFRQICGVKDEAQPDGQR-PATLLVVLSKDNVGITEHGAVLKHVHP----------LCKKDCNNNRC LRCKNVLAGNQLVQLAD----ITCAW-NLFEC--LGDSESLFGSPLAIKVDEDRGFTAP-------SK-------VEPK CQQFYSKSCTHC----------------VM------CSVGSNASEEDALELYTHKPVPDPQCFVSIVVRNIPEH-SPCK -----------------------------P---CS-----------------VEEVCYCKNCDSKDVRATQNAYQELAI PLSKLK-APWLSMGHCECWEEGINNILSMVKHELVVDQDMVNRSPEVAAKMAYFGPDGFHWDIELCEENDLTCDDFELG W-VLKP-------EDDHPL-ICIGSFSVHKQFSVMVYPLPMND-AIRMSQPCYAAHAQDIHTDQ---DRYDIRTFLEHS I-RCHLCNQ-ACHEENIVWRDDLKDPITTEVIADKLQEWPTNQN-AENVAD----HRQLFH-------ANSSCLDRHRH NVMMN-GSSCIRTLIIGGSIVGVNVFMMGPCQEFFTDLFMVYEGLQYAVVCAINIIIQVLHTNEGVVCNVKCKEFLQRE DDMKSGLIGIICNHISRMCMHIL-VHCLLTHCGLKAME-IPFDR-TGVVRGPLIAEP--LYLII-YVNAVEPYTDAYKK PKSMHQFQFDDVQIRYRTNTGFEENPKLATHLVHVCVVVPHPWTKGKSEILQNMESARHLYNAVIRDNNASHQKE--AH V-PAGDQGRISRAKNEILYIRDLE bpp-seq-2.1.0/test/test_alphabets.cpp000644 000000 000000 00000005646 12147656566 017561 0ustar00rootroot000000 000000 // // File: test_alphabets.cpp // Created by: Julien Dutheil // Created on: Sat Oct 30 17:10 2010 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for numerical calculus. This file is part of the Bio++ project. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include #include #include #include #include #include #include #include #include #include using namespace bpp; using namespace std; int main() { //This is a very simple test that instanciate all alpahabet classes. NucleicAlphabet* dna = new DNA(); NucleicAlphabet* rna = new RNA(); Alphabet* pro = new ProteicAlphabet; Alphabet* def = new DefaultAlphabet; Alphabet* stdCdn = new StandardCodonAlphabet(rna); Alphabet* vmtCdn = new VertebrateMitochondrialCodonAlphabet(rna); Alphabet* imtCdn = new InvertebrateMitochondrialCodonAlphabet(rna); Alphabet* emtCdn = new EchinodermMitochondrialCodonAlphabet(rna); Alphabet* ymtCdn = new YeastMitochondrialCodonAlphabet(rna); delete dna; delete rna; delete pro; delete def; delete stdCdn; delete vmtCdn; delete imtCdn; delete emtCdn; delete ymtCdn; return (0); } bpp-seq-2.1.0/test/test_walker.cpp000644 000000 000000 00000010027 12147656566 017070 0ustar00rootroot000000 000000 // // File: test_walker.cpp // Created by: Julien Dutheil // Created on: Thu Nov 24 14:42 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for numerical calculus. This file is part of the Bio++ project. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include #include #include #include #include #include using namespace bpp; using namespace std; bool testSeq(SequenceWalker& walker, unsigned int pos, unsigned int truth) { cout << 0 << endl; cout << walker.getSequencePosition(0) << endl; cout << 46 << endl; cout << walker.getSequencePosition(46) << endl;; for (unsigned int i = 0; i < 1000; ++i) { ApplicationTools::displayGauge(i, 999, '='); size_t r = RandomTools::giveIntRandomNumberBetweenZeroAndEntry(47); size_t x = walker.getSequencePosition(r); if (walker.getSequencePosition(pos) != truth) { cout << endl; cerr << r << "\t" << x << endl; cerr << walker.getSequencePosition(pos) << "<>" << truth << endl; return false; } } cout << endl; return true; } bool testAln(SequenceWalker& walker, unsigned int pos, unsigned int truth) { cout << 0 << endl; cout << walker.getAlignmentPosition(0) << endl; cout << 26 << endl; cout << walker.getAlignmentPosition(26) << endl; for (unsigned int i = 0; i < 1000; ++i) { ApplicationTools::displayGauge(i, 999, '='); unsigned int r = RandomTools::giveIntRandomNumberBetweenZeroAndEntry(27); walker.getAlignmentPosition(r); if (walker.getAlignmentPosition(pos) != truth) { cout << endl; cerr << walker.getSequencePosition(pos) << "<>" << truth << endl; return false; } } cout << endl; return true; } int main() { RNA* alpha = new RNA(); BasicSequence seq1("seq1", "----AUGCCG---GCGU----UUU----G--G-CCGACGUGUUUU--", alpha); SequenceWalker walker(seq1); for (unsigned int i = 0; i < 27; ++i) { size_t j = walker.getAlignmentPosition(i); cout << i << "\t" << seq1.getChar(j) << "\t" << j << endl; } cout << endl; if (!testAln(walker, 5, 9)) return 1; if (!testAln(walker, 10, 21)) return 1; if (!testAln(walker, 22, 40)) return 1; cout << "_________________________________________________" << endl; for (unsigned int i = 0; i < seq1.size(); ++i) { cout << i << "\t" << seq1.getChar(i) << "\t" << walker.getSequencePosition(i) << endl; } cout << endl; if (!testSeq(walker, 9, 5)) return 1; if (!testSeq(walker, 21, 10)) return 1; if (!testSeq(walker, 40, 22)) return 1; return 0; } bpp-seq-2.1.0/test/test_sequences.cpp000644 000000 000000 00000006753 12147656566 017611 0ustar00rootroot000000 000000 // // File: test_sequences.cpp // Created by: Julien Dutheil // Created on: Mon Dec 130 17:10 2010 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for numerical calculus. This file is part of the Bio++ project. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include #include #include #include using namespace bpp; using namespace std; int main() { BasicSequence seq1("test DNA", "ATTTCG---TCGTT-AAAGCWCATGCATCGATC", &AlphabetTools::DNA_ALPHABET); BasicSequence motif1("motif", "ATTT", &AlphabetTools::DNA_ALPHABET); BasicSequence motif2("motif", "TCG", &AlphabetTools::DNA_ALPHABET); BasicSequence motif3("motif", "GATC", &AlphabetTools::DNA_ALPHABET); BasicSequence motif4("motif", "CGTC", &AlphabetTools::DNA_ALPHABET); BasicSequence motif5("motif", "CWCAT", &AlphabetTools::DNA_ALPHABET); BasicSequence motif6("motif", "CTCAT", &AlphabetTools::DNA_ALPHABET); BasicSequence motif7("motif", "AAGCA", &AlphabetTools::DNA_ALPHABET); size_t pos; cout << "--- Strict match ---" << endl; pos = SequenceTools::findFirstOf(seq1, motif1); if (pos != 0) return 1; cout << motif1.toString() << ": " << pos << endl; pos = SequenceTools::findFirstOf(seq1, motif2); if (pos != 3) return 1; cout << motif2.toString() << ": " << pos << endl; pos = SequenceTools::findFirstOf(seq1, motif3); if (pos != 29) return 1; cout << motif3.toString() << ": " << pos << endl; pos = SequenceTools::findFirstOf(seq1, motif4); if (pos != 33) return 1; cout << motif4.toString() << ": " << pos << endl; pos = SequenceTools::findFirstOf(seq1, motif5); if (pos != 19) return 1; cout << motif5.toString() << ": " << pos << endl; cout << "--- Degenerated match ---" << endl; pos = SequenceTools::findFirstOf(seq1, motif6, false); if (pos != 19) return 1; cout << motif6.toString() << ": " << pos << endl; pos = SequenceTools::findFirstOf(seq1, motif7, false); if (pos != 16) return 1; cout << motif7.toString() << ": " << pos << endl; return (0); } bpp-seq-2.1.0/test/test_io.cpp000644 000000 000000 00000007702 12147656566 016220 0ustar00rootroot000000 000000 // // File: test_io.cpp // Created by: Julien Dutheil // Created on: Mon Nov 01 10:16 2010 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for numerical calculus. This file is part of the Bio++ project. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include #include #include #include #include #include using namespace bpp; using namespace std; int main() { //This program reads a protein alignment generated using SimProt //[http://www.uhnresearch.ca/labs/tillier/simprotWEB/] in various file formats ProteicAlphabet* alpha = new ProteicAlphabet; Fasta fasta; const SiteContainer* sites1 = fasta.readAlignment("example.fasta", alpha); //test number of seq cout << "example.fasta contains " << sites1->getNumberOfSequences() << " sequences" << endl; if (sites1->getNumberOfSequences() != 100) { return 1; } Mase mase; const SiteContainer* sites2 = mase.readAlignment("example.mase", alpha); Clustal clustal; const SiteContainer* sites3 = clustal.readAlignment("example.aln", alpha); Phylip phylip(true, false); const SiteContainer* sites4 = phylip.readAlignment("example.ph", alpha); Phylip phylip3(true, true); const SiteContainer* sites5 = phylip3.readAlignment("example.ph3", alpha); cout << sites1->getNumberOfSequences() << "\t" << sites1->getNumberOfSites() << endl; cout << sites2->getNumberOfSequences() << "\t" << sites2->getNumberOfSites() << endl; cout << sites3->getNumberOfSequences() << "\t" << sites3->getNumberOfSites() << endl; cout << sites4->getNumberOfSequences() << "\t" << sites4->getNumberOfSites() << endl; cout << sites5->getNumberOfSequences() << "\t" << sites5->getNumberOfSites() << endl; //Test: bool test = sites1->getNumberOfSequences() == sites2->getNumberOfSequences() && sites1->getNumberOfSequences() == sites3->getNumberOfSequences() && sites1->getNumberOfSequences() == sites4->getNumberOfSequences() && sites1->getNumberOfSequences() == sites5->getNumberOfSequences() && sites1->getNumberOfSites() == sites2->getNumberOfSites() && sites1->getNumberOfSites() == sites3->getNumberOfSites() && sites1->getNumberOfSites() == sites4->getNumberOfSites() && sites1->getNumberOfSites() == sites5->getNumberOfSites(); delete sites1; delete sites2; delete sites3; delete sites4; delete sites5; delete alpha; return (test ? 0 : 1); } bpp-seq-2.1.0/INSTALL.txt000644 000000 000000 00000000700 12147656566 014725 0ustar00rootroot000000 000000 This software needs cmake >= 2.6 to build. After installing cmake, run it with the following command: cmake -DCMAKE_INSTALL_PREFIX=[where to install, for instance /usr/local or $HOME/.local] . If available, you can also use ccmake instead of cmake for a more user-friendly interface. Then compile and install the software with make install You may also consider installing and using the software checkinstall for easier system administration. bpp-seq-2.1.0/AUTHORS.txt000644 000000 000000 00000003005 12147656566 014745 0ustar00rootroot000000 000000 Julien Dutheil Sylvain Gaillard Khalid Belkhir Laurent Gueguen Sylvain Glémin Guillaume Deuchst Eric Bazin Vincent Cahais Contributed code to Bio++ was enabled thanks to the following institutions and resources: 2002 - 2006 Laboratoire GPIA - UMR CNRS 5171 Université Montpellier 2 (Eric Bazin, Khalid Belkhir, Guillaume Deuchst, Julien Dutheil, Sylvain Gaillard, Nicolas Galtier, Sylvain Glémin) 2005 - ISE-M UMR CNRS 5554 Université Montpellier 2 (Vincent Ranwez, Céline Scornavacca) 2006 - ISE-M UMR CNRS 5554 Université Montpellier 2 (Khalid Belkhir, Nicolas Galtier, Sylvain Glémin) 2006 - 2007 ISE-M UMR CNRS 5554 Université Montpellier 2 (Julien Dutheil) 2007 - 2010 Bioinformatics Research Center, University of Aarhus (Julien Dutheil). Funded by European research Area on Plant Genomics (ERA-PG) ARelatives. 2010 - ISE-M UMR CNRS 5554 Université Montpellier 2 (Julien Dutheil) 2010 - ISE-M UMR CNRS 5554 Université Montpellier 2 (Vincent Cahais) 2007 - Genetics and Horticulture UMR INRA 1259 Angers-Nantes INRA Center (Sylvain Gaillard) 2008 - 2009 Laboratoire BBE - UMR CNRS 5558 Université Lyon 1 (Bastien Boussau) 2009 - 2010 Berkeley University (Bastien Boussau) 2010 - Laboratoire BBE - UMR CNRS 5558 Université Lyon 1 (Bastien Boussau) 2008 - Laboratoire BBE - UMR CNRS 5558 Université Lyon 1 (Laurent Guéguen) bpp-seq-2.1.0/debian/copyright000644 000000 000000 00000005641 12147656566 016244 0ustar00rootroot000000 000000 This package was debianized by Julien Dutheil on Tue, 05 Mar 2013 14:34:00 +0100. It was downloaded from Upstream Author: Julien Dutheil Copyright: Copyright (C) 2013 Bio++ Development Team License: This package is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This package is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this package; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA On Debian systems, the complete text of the GNU General Public License can be found in `/usr/share/common-licenses/GPL'. The Debian packaging is (C) 2013, Julien Dutheil and is licensed under the GPL, see above. The provided software is distributed under the CeCILL license: This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. The complete text of the license may be found here: http://www.cecill.info/licences/Licence_CeCILL_V2-en.html bpp-seq-2.1.0/debian/changelog000644 000000 000000 00000003656 12147656566 016167 0ustar00rootroot000000 000000 libbpp-seq (2.1.0-1) unstable; urgency=low * 'omics' tools now in bpp-seq-omics * Extended BppO support * StateProperties renamed to AlphabetIndex -- Julien Dutheil Tue, 05 Mar 2013 14:31:00 +0100 libbpp-seq (2.0.3-1) unstable; urgency=low * Bugs fixed in maf parser + new iterators * Added support for GFF * Added support for FastQ * Several bugs fixed -- Julien Dutheil Thu, 09 Feb 2012 14:00:00 +0100 libbpp-seq (2.0.2-1) unstable; urgency=low * RFP: Bio++ -- The Bio++ bioinformatics libraries. (Closes: #616373). * Packages are now non-native. * Bugs fixed in maf parser. -- Julien Dutheil Thu, 09 Jun 2011 11:00:00 +0100 libbpp-seq (2.0.1) unstable; urgency=low * Fixed copyright and rules file. -- Julien Dutheil Mon, 28 Feb 2011 09:00:00 +0100 libbpp-seq (2.0.0) unstable; urgency=low * New source files organization. * More NGS tools: sequence annotations, quality scores, file parsers, etc. -- Julien Dutheil Mon, 07 Feb 2011 09:00:00 +0100 libbpp-seq (1.7.0) unstable; urgency=low * Several bug fixed. Compilation with -Weffc++. * New compressed sites container. * Improved security by changing several pointers to references. -- Julien Dutheil Thu, 25 Mar 2010 10:02:50 +0100 libbpp-seq (1.6.0) unstable; urgency=low * New support for Nexus sequence files. * Update for version 1.8 of Bio++. -- Julien Dutheil Wed, 10 Jun 2009 11:28:58 +0100 libbpp-seq (1.5.0) unstable; urgency=low * Several bugs fixed, new Phred sequence format, speed improvements. -- Julien Dutheil Thu, 11 Dec 2008 12:21:37 +0100 libbpp-seq (1.4.1) unstable; urgency=low * Initial Release. -- Julien Dutheil Mon, 21 Jul 2008 15:17:26 +0200 bpp-seq-2.1.0/debian/prerm000755 000000 000000 00000000616 12147656566 015361 0ustar00rootroot000000 000000 #! /bin/bash # Abort if any command returns an error value set -e removeGeneric() { if [ -f $1.all ] then echo "-- Remove generic include file: $1.all" rm $1.all fi for file in "$1"/* do if [ -d "$file" ] then # Recursion: removeGeneric $file fi done } if [ "$1" = "remove" ]; then # Actualize .all files removeGeneric /usr/include/Bpp fi exit 0 bpp-seq-2.1.0/debian/rules000755 000000 000000 00000005311 12147656566 015363 0ustar00rootroot000000 000000 #!/usr/bin/make -f # -*- makefile -*- # Sample debian/rules that uses debhelper. # This file was originally written by Joey Hess and Craig Small. # As a special exception, when this file is copied by dh-make into a # dh-make output file, you may use that output file without restriction. # This special exception was added by Craig Small in version 0.37 of dh-make. # 24/01/10 Modification for use with CMake by Julien Dutheil. # Uncomment this to turn on verbose mode. #export DH_VERBOSE=1 # These are used for cross-compiling and for saving the configure script # from having to guess our platform (since we know it already) DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) CFLAGS = -Wall -g ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS))) CFLAGS += -O0 else CFLAGS += -O2 endif # shared library versions version=`ls src/lib*.so.* | \ awk '{if (match($$0,/[0-9]+\.[0-9]+\.[0-9]+$$/)) print substr($$0,RSTART)}'` major=`ls src/lib*.so.* | \ awk '{if (match($$0,/\.so\.[0-9]+$$/)) print substr($$0,RSTART+4)}'` configure: cmake -DCMAKE_INSTALL_PREFIX=/usr -DBUILD_TESTING=OFF . config.status: configure dh_testdir build: build-stamp build-stamp: config.status dh_testdir # Add here commands to compile the package. $(MAKE) touch $@ clean: dh_testdir dh_testroot # Add here commands to clean up after the build process. [ ! -f Makefile ] || $(MAKE) clean; [ ! -f Makefile ] || rm Makefile; [ ! -f src/Makefile ] || rm src/Makefile; [ ! -f test/Makefile ] || rm test/Makefile; rm -f config.sub config.guess rm -f build-stamp rm -f CMakeCache.txt rm -f *.cmake rm -f src/*.cmake rm -f test/*.cmake rm -rf CMakeFiles rm -rf src/CMakeFiles rm -rf test/CMakeFiles rm -rf _CPack_Packages rm -rf Testing rm -f DartConfiguration.tcl dh_clean install: build dh_testdir dh_testroot dh_prep dh_installdirs # Add here commands to install the package into debian/tmp $(MAKE) DESTDIR=$(CURDIR)/debian/tmp install # Build architecture-independent files here. binary-indep: build install # We have nothing to do by default. # Build architecture-dependent files here. binary-arch: build install dh_testdir dh_testroot dh_installchangelogs ChangeLog dh_installdocs dh_installexamples dh_install # dh_installmenu # dh_installdebconf # dh_installlogrotate # dh_installemacsen # dh_installpam # dh_installmime # dh_installinit # dh_installcron # dh_installinfo dh_installman dh_link dh_strip dh_compress dh_fixperms # dh_perl # dh_python dh_makeshlibs dh_installdeb dh_shlibdeps dh_gencontrol dh_md5sums dh_builddeb binary: binary-indep binary-arch .PHONY: build clean binary-indep binary-arch binary install bpp-seq-2.1.0/debian/docs000644 000000 000000 00000000000 12147656566 015144 0ustar00rootroot000000 000000 bpp-seq-2.1.0/debian/postrm000755 000000 000000 00000001524 12147656566 015557 0ustar00rootroot000000 000000 #! /bin/bash # Abort if any command returns an error value set -e createGeneric() { echo "-- Creating generic include file: $1.all" #Make sure we run into subdirectories first: dirs=() for file in "$1"/* do if [ -d "$file" ] then # Recursion: dirs+=( "$file" ) fi done for dir in ${dirs[@]} do createGeneric $dir done #Now list all files, including newly created .all files: if [ -f $1.all ] then rm $1.all fi dir=`basename $1` for file in "$1"/* do if [ -f "$file" ] && ( [ "${file##*.}" == "h" ] || [ "${file##*.}" == "all" ] ) then file=`basename $file` echo "#include \"$dir/$file\"" >> $1.all fi done; } if [ "$1" = "remove" ]; then # Automatically added by dh_makeshlibs ldconfig # Actualize .all files createGeneric /usr/include/Bpp fi exit 0 bpp-seq-2.1.0/debian/compat000644 000000 000000 00000000002 12147656566 015501 0ustar00rootroot000000 000000 5 bpp-seq-2.1.0/debian/libbpp-seq-dev.install000644 000000 000000 00000000110 12147656566 020475 0ustar00rootroot000000 000000 debian/tmp/usr/include/* debian/tmp/usr/lib/*.a debian/tmp/usr/lib/*.so bpp-seq-2.1.0/debian/postinst000755 000000 000000 00000001443 12147656566 016116 0ustar00rootroot000000 000000 #! /bin/bash # Abort if any command returns an error value set -e createGeneric() { echo "-- Creating generic include file: $1.all" #Make sure we run into subdirectories first: dirs=() for file in "$1"/* do if [ -d "$file" ] then # Recursion: dirs+=( "$file" ) fi done for dir in ${dirs[@]} do createGeneric $dir done #Now list all files, including newly created .all files: if [ -f $1.all ] then rm $1.all fi dir=`basename $1` for file in "$1"/* do if [ -f "$file" ] && ( [ "${file##*.}" == "h" ] || [ "${file##*.}" == "all" ] ) then file=`basename $file` echo "#include \"$dir/$file\"" >> $1.all fi done; } if [ "$1" = "configure" ]; then # Actualize .all files createGeneric /usr/include/Bpp fi exit 0 bpp-seq-2.1.0/debian/libbpp-seq9.install000644 000000 000000 00000000035 12147656566 020020 0ustar00rootroot000000 000000 debian/tmp/usr/lib/lib*.so.* bpp-seq-2.1.0/debian/control000644 000000 000000 00000001330 12147656566 015703 0ustar00rootroot000000 000000 Source: libbpp-seq Section: libs Priority: optional Maintainer: Loic Dachary Uploaders: Julien Dutheil Build-Depends: debhelper (>= 5), cmake (>= 2.6), libbpp-core-dev (>= 2.1.0) Standards-Version: 3.9.1 Package: libbpp-seq-dev Section: libdevel Architecture: any Depends: libbpp-seq9 (= ${binary:Version}), ${misc:Depends}, libbpp-core-dev (>= 2.1.0) Description: Bio++ Sequence library development files. Contains the Bio++ classes for sequence analysis. Package: libbpp-seq9 Section: libs Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends}, libbpp-core2 (>= 2.1.0) Description: Bio++ Sequence library. Contains the Bio++ classes for sequence analysis. bpp-seq-2.1.0/debian/source/format000644 000000 000000 00000000014 12147656566 017011 0ustar00rootroot000000 000000 3.0 (quilt) bpp-seq-2.1.0/ChangeLog000644 000000 000000 00000032650 12147656566 014641 0ustar00rootroot000000 000000 05/03/13 -*- Version 2.1.0 -*- 07/02/13 Julien Dutheil * StateProperties => Alphabet index (dir renamed). * New BppO classes forAlphabet Index 1 and 2, and corresponding methods in SequenceApplicationTools. 19/01/13 Julien Dutheil * Compiles with -Wconversion. 09/01/13 Julien Dutheil * Use size_t instead of unsigned int for indexes in containers. 17/09/12 Julien Dutheil * Reorganized IO classes. 11/09/12 Julien Dutheil * Added generic container converter function. 30/07/12 Julien Dutheil * Moved features, fastq and maf to new bpp-seq-omics library. 18/07/12 Julien Dutheil * Bug fixed in merge maf iterator, in case a sequence is missing in one of the two blocks. 25/06/12 Julien Dutheil * Extended maf tools and reorganization of the code. * Extended GFF output. * New [] operator in SequenceFeatureSet. 18/06/12 Julien Dutheil * Fixed bug in Maf parser: scores are now parsed correctly. 08/03/12 Julien Dutheil * Added support for '*' character in proteins (stop codon) 09/02/12 -*- Version 2.0.3 -*- 16/01/12 Julien Dutheil * Bug #42 solved, new support for extended fasta parser in application tools. * Added contribution from Benoit Nabholz and Annabelle Haudry on 4-fold degenerated sites. 06/01/12 Julien Dutheil * Added support for "!" state in nucleic alphabet, which can be either a gap or an unknown character. 24/11/11 Julien Dutheil * Added SequenceWalker class and test. 22/11/11 Sylvain Gaillard * Added support for FastQ 21/11/11 Julien Dutheil * Added support for GFF3. 21/11/11 Sylvain Gaillard * Bug #35 solved. 18/11/11 Julien Dutheil * New MafIterators: SequenceStatistics and PairwiseSequenceStatistics. * Function SequenceTools::getPercentIdentity now allows to exclude gaps. 15/09/11 Julien Dutheil * Merge function of SequenceWithAnnotation now also merges annotations properly. 17/06/11 Julien Dutheil * Bug #39 solved. 09/06/11 -*- Version 2.0.2 -*- 08/06/11 Julien Dutheil * Fixed several Maf iterators. 28/02/11 -*- Version 2.0.1 -*- * Fixed debian packaging. 07/02/11 -*- Version 2.0.0 -*- 19/11/10 Julien Dutheil * Solved bug 0000016 (General comments not copied in containers). * A few more code cleaning. 01/11/10 Julien Dutheil * Solved bug 0000005 (Clustal parser). * Added unit testing for sequence input format. * Solved bug 0000004 (Remove gap sites in SiteContainer). 12/10/10 Benoît Nabholz * Added yeast mitochondrial codon alphabet and genetic code. 20/09/10 Julien Dutheil * Reorganization of the code for version 2.0. 06/09/10 Julien Dutheil * Added support for masked sequences in SequenceWithAnnotationTools and MafParser. 05/09/10 Julien Dutheil * First draft of CaseMaskedAlphabet, for dealing with repeat-masked sequences for instance. 21/07/10 Julien Dutheil * Added method hasSequence(name) in SequenceContainer interface. 20/07/10 Julien Dutheil * New Sequence interface design. * Maf parser now reads quality scores if there are some. 19/07/10 Julien Dutheil * New method isInit in CodonAlphabet. * New method getCDS in SequenceTools. 28/04/10 Julien Dutheil * New MAF genome alignment parser. 22/04/10 Sylvain Gaillard * Break SequenceTools interface: - renamed invert -> getInvert - renamed complement -> getComplement now invert and complement methods deals with sequence ref - New method invertComplement 16/04/10 Julien Dutheil * New Stockholm sequence format (output only). * Refactoring of class ISequence2 => IAlignment + new interface OAlignment. 24/03/10 -*- Version 1.7.0 -*- 11/03/10 Julien Dutheil * Nexus format now deals with lower case keywords. 17/12/09 Julien Dutheil * Code cleaning for VectorSiteContainer * New class CompressedVectorSiteContainer using less memory than VectorSiteContainer, but more restricted. 16/11/09 Sylvain Gaillard * No more "using namespace" in header files. 04/11/09 Julien Dutheil * Remove SymbolListTools::randomizeContent to a more efficient SymbolList::shuffle method. 12/10/09 Julien Dutheil * Now SeqLib compiles with -Weffc++ and -Wshadow without any warning :) 22/09/09 Julien Dutheil * Finally added 'addSequence' method in SequenceContainer. * MapSequenceContainer now has a addSequence method that uses the name of the sequence as a key by default. * Several methods in Sequence/SiteContainerTools are not templated anymore and have been improved. 09/09/09 Sylvain Gaillard * Fixed white space skip in Fasta sequence 20/08/09 Julien Dutheil * Added config files for CMake. 19/08/09 Sylvain Gaillard * Introduced 3 new interfaces IOSequenceStream, ISequenceStream and OSequenceStream. These interfaces are intended to be used by IOFormat dealing with Sequence and not SequenceContainers. They are actually implemented by the Fasta class. 17/08/09 Adam Smith * Bug fixed in DefaultAlphabet: unknow state is 37 not 38. 04/08/09 Sylvain Gaillard * Introduced new classes for Alphabet States 23/07/09 Sylvain Gaillard * Added SequenceTools::getPutativeHaplotypes 22/07/09 Sylvain Gaillard * Added SequenceTools::getNumberOfUnresolvedSites * Updated SequenceTools::getNumberOfSites, SequenceTools::getNumberOfCompleteSites and SequenceTools::removeGaps to be Alphabet implementation independant. 07/07/07 Julien Dutheil * Updated GranthamAAChemicalDistance, with a new signed, non-symmetric, option. 26/06/09 Sylvain Gaillard * Added new SequencePositionIterators classes. 23/06/09 Julien Dutheil * Actualization of the Container classes: now returns references and not pointer. Several code updates, including copy contructors. 17/06/09 Sylvain Gaillard (Glasgow Workshop) * Fix DNA/RNA/ProteicAlphabet::getAlias: works now with lower case states. * Fix SiteTools::isConstant test gap or unknown * Turning SequenceContainerTools::getFrequencies into void function. 21/07/08 -*- Version 1.6.0 -*- 28/05/09 Julien Dutheil * Added basic support for the Nexus format. 22/05/09 Julien Dutheil * Improved Clustal format (solves bug found by Emmanuel M.) * SequenceApplicationTools uses the keyval syntax for format description. 27/04/09 Julien Dutheil * Added site randomization method. 28/02/09 Julien Dutheil * Added mutual information calculation. * Added pairwise counting methods. * Frequencies counting methods have been improved (avoid useless copy of maps). 16/01/09 Julien Dutheil * max_gap_allowed option in SequenceApplicationTools is now a included bound: 0 or 0% will keep all complete sites, and 'n=number of sequences' or 100% will keep all sites. 13/01/09 Sylvain Gaillard * Little optimization in Fasta::write() method. 12/01/09 Sylvain Gaillard * Added support for comments (HUPO-PSI like) in Fasta. 08/01/09 Julien Dutheil * SequenceTools::reverse method has been deprecated in favor of the SequenceTools::inverse method, of witch it is now an alias. 07/01/09 Julien Dutheil * Code improvement in Vector containers. 21/12/08 Julien Dutheil * Return type for clone() methods corrected (Sequence, Site, SiteContainer and derivated classes). * Bug fixed in SequenceApplicationTools::get[Sequence/Site]Container, phylip format. 21/07/08 -*- Version 1.5.0 -*- 05/12/08 Julien Dutheil * VectorSiteContainer::getSite() faster (more than 10 times faster on a ~500 sequences data set!) 04/12/08 Julien Dutheil * Fixed missing redefinition of translate and reverse methods in class DNAToRNA. * Made inheritance virtual. 01/12/08 Julien Dutheil * New methods getSupportedInts and getSupportedChars in Alphabet classes. 18/11/08 Julien Dutheil * biopp-help-forum 91 (by Stephen Smith): Fasta, Mase, Clustal and Phylip now have an option to turn off the name checking and decrease loading time. * biopp-help-forum 93 (by Stephen Smith): Phylip format now supports any delimiter when reading/writing files in extended format. 14/11/08 Sylvain Gaillard * New method SequenceTools::subtractHaplotype(Sequence, Sequence, string, unsigned int). 07/11/08 Julien Dutheil * New contructor in VectorSiteContainer now as a tag to disable position checking, which can turn to be quite slow. 06/11/08 Sylvain Gaillard * New class PhredPhd to read sequence from phd files produced by the phred program * Removed an unused method in PhredPoly 05/11/08 Sylvain Gaillard * New methods Alphabet::getGeneric(states) implemented in AbstractAlphabet, DefaultAlphabet, DNA, RNA and ProteicAlphabet. * New alphabet exception CharStateNotSupportedException. * New class PhredPoly to read sequence from poly files produced by the phred program. * Fixed heterozygous site read in PhredPoly. 09/10/08 Julien Dutheil * New method SequenceTools::invert(Sequence). 24/09/08 Bastien Boussau & Julien Dutheil * Memory leak fixed in Fasta and Mase readers. 21/07/08 -*- Version 1.4.1 -*- 26/06/08 Julien Dutheil * Bug fixed in Clustal format. 20/04/08 Julien Dutheil * New interface ISequence2 for reading alignments. 18/03/08 Julien Dutheil and Bastien Boussau * Bowker's test for homogeneity implemented in SequenceTools. 08/02/08 Julien Dutheil * Bug fixed in VectorSequenceContainer: method addSequence is now virtual! (thanks to Laurent Gueguen for pointing out this bug)!. 07/02/08 Julien Dutheil * Added function isDNAAlphabet and isRNAAlphabet in AlphabetTools. * Bug fixed in getAlphabetFromSequence method. * Method getContent in Sequence and Site now outputs a reference. 18/01/08 -*- Version 1.4.0 -*- 09/01/08 Julien Dutheil * All classes included in the new 'bpp' namespace. 06/11/07 Julien Dutheil * New function SequenceContainerTools::merge. 02/10/07 Julien Dutheil * New class GenBank, enabling GenBank sequence format support for input. * Added new copy constructors from SequenceContainer in Vector[Site/Sequence]Container. * New methods changeUnresolvedCharacterToGaps and changeGapsToUnknownCharacters in SymbolListTools. * Bug fixed in changeUnresolvedCharacterToGaps in SiteContainerTools. 06/07/07 -*- Version 1.3.0 -*- 12/06/07 Julien Dutheil * More tools in class alphabet (isGap, isUnresolved, etc.) * New class DistanceMatrix, previously in phyllib. * New similarity measures in SiteContainerTools. 04/06/07 Julien Dutheil * New constructor from sequence names in VectorSiteContainer. 01/06/07 Julien Dutheil * New method SiteContainerTools::boostrapSites to bootstrap an alignment. 21/05/07 Julien Dutheil * New method SequenceContainerTools::getSequenceContainer to read non-aligned sequences. 04/05/07 Julien Dutheil * Bug fixed in global alignment with gap opening and extending penalties. * New generic score scheme. * Compatibility update (NumCalc) 24/04/07 Benoît Nabholz & Julien Dutheil * GeneticCode classes can now translate unknow codons to unknown amino acids. 02/04/07 Julien Dutheil * VIRTUAL_COV variable changed to NO_VIRTUAL_COV. configure.ac file updated. => Default behaviour is now /really/ to use covariant return type with virtual inheritence, even when importing the files in an IDE. To use the old compilers behaviour, one must set the NO_VIRTUAL_COV preproc variable. * this modification also solves a problem with the old configure.ac which was not correctly updated in version 1.2.0 :( * Bug fixed in SymbolListTools::getCounts. 02/04/07 Julien Dutheil * New method getAlphabetIndex1() in class SimpleIndexDistance. 24/02/07 Julien Dutheil * New pairwise alignment algorithm: gap opening and extending penalties. 22/02/07 Julien Dutheil * GeneticCode derivated classes now properly redefine the translate(Sequence&) method. 19/01/07 -*- Version 1.2.0 -*- 19/01/07 Julien Dutheil * Functions AlphabetTools::getType() and StringSequenceTools::getAlphabetFromSequence() rewritten. * New DefaultNucleotideScore matrix. * New AAIndex1Entry and AAIndex2Entry classes which create AlphabetIndex1 and AlphabetIndex2 objects from AAIndex1 and AAIndex2 entries, respectively. 18/12/06 Julien Dutheil * New alignment tools in SiteContainerTools, including the Needleman and Wunsch alogrithm. * BLOSUM50 matrix available. * Bug fixed in Sequence.h: method setContent is now properly redefined. * Bug fixed in ProteicAlphabet->getAbbr(int). The returned result is now correct!!! 06/12/06 Julien Dutheil * DefaultAlphabet now accepts "." characters. * New AlphabetTools::isDefaultAlphabet(const Alphabet *) method. * New SiteContainerTools::resolveDottedAlignment(const Sitecontainer &, const Alphabet *) method. 30/08/06 Julien Dutheil * Bug fixed in phylip sequential reader, + duplicated sequence in container now throws sequence name. (included in 1.1.1 release). 28/08/06 -*- Version 1.1.1 -*- 08/08/06 Julien Dutheil * Bug fixed in Phylip sequential read method. 15/06/06 Julien Dutheil * Method changeGapsToUnknownCharacters() moved from SequenceContainerTools to SiteContainerTools, and is really faster! * New removeGapOnlySites() method in SiteContainerTools. * New method SiteTools::isGapOnly(). * new 'sequence.max_gap_allowed' option in SequenceApplicationTools::getSitesToAnalyse(). 13/06/06 Julien Dutheil * Phylip sequential format improved, no more empty line between sequences needed. 12/06/06 Julien Dutheil * SiteTools::isConstant() method now has a 'ignoreUnknown' option. 01/01/06 Julien Dutheil * VectorSiteContainer and AlignedSequenceContainer now number sites from 1 (and not 0). 17/05/06 Benoît Nabholz, Sylvain Glémin & Julien Dutheil * Bug fixed in SiteContainerTools::getConsensus(), + SymbolListTools::getFrequencies() and getCounts() improved. 15/05/06 Julien Dutheil * Bug fixed: Added getUnknownCharacterCode() method in DefaultAlphabet. 09/05/06 Julien Dutheil * New AAChargeIndex class. * Bug fixed in SiteTools::hasUnknown(Site). 18/04/06 -*- Version 1.1.0 -*- 18/04/06 Julien Dutheil * New IOSequenceFactory class. 06/04/06 Julien Dutheil * New () function operators defined on SequenceContainers. bpp-seq-2.1.0/Doxyfile000644 000000 000000 00000240074 12147656566 014576 0ustar00rootroot000000 000000 # Doxyfile 1.8.3.1-20130209 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project # # All text after a hash (#) is considered a comment and will be ignored # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" ") #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # http://www.gnu.org/software/libiconv for the list of possible encodings. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or sequence of words) that should # identify the project. Note that if you do not use Doxywizard you need # to put quotes around the project name if it contains spaces. PROJECT_NAME = bpp-seq # The PROJECT_NUMBER tag can be used to enter a project or revision number. # This could be handy for archiving the generated documentation or # if some version control system is used. PROJECT_NUMBER = 2.1.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer # a quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify an logo or icon that is # included in the documentation. The maximum height of the logo should not # exceed 55 pixels and the maximum width should not exceed 200 pixels. # Doxygen will copy the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. OUTPUT_DIRECTORY = # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output # format and will distribute the generated files over these directories. # Enabling this option can be useful when feeding doxygen a huge amount of # source files, where putting all generated files in the same directory would # otherwise cause performance problems for the file system. CREATE_SUBDIRS = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # The default language is English, other supported languages are: # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English # messages), Korean, Korean-en, Latvian, Lithuanian, Norwegian, Macedonian, Persian, # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will # include brief member descriptions after the members that are listed in # the file and class documentation (similar to JavaDoc). # Set to NO to disable this. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend # the brief description of a member or function before the detailed description. # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator # that is used to form the text in various listings. Each string # in this list, if found as the leading text of the brief description, will be # stripped from the text and the result after processing the whole list, is # used as the annotated text. Otherwise, the brief description is used as-is. # If left blank, the following values are used ("$name" is automatically # replaced with the name of the entity): "The $name class" "The $name widget" # "The $name file" "is" "provides" "specifies" "contains" # "represents" "a" "an" "the" ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # Doxygen will generate a detailed section even if there is only a brief # description. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. INLINE_INHERITED_MEMB = YES # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full # path before files name in the file list and in the header files. If set # to NO the shortest path that makes the file name unique will be used. FULL_PATH_NAMES = YES # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag # can be used to strip a user-defined part of the path. Stripping is # only done if one of the specified strings matches the left-hand part of # the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the # path to strip. Note that you specify absolute paths here, but also # relative paths, which will be relative from the directory where doxygen is # started. STRIP_FROM_PATH = ./src/ # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of # the path mentioned in the documentation of a class, which tells # the reader which header file to include in order to use a class. # If left blank only the name of the header file containing the class # definition is used. Otherwise one should specify the include paths that # are normally passed to the compiler using the -I flag. STRIP_FROM_INC_PATH = ./src/ # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter # (but less readable) file names. This can be useful if your file system # doesn't support long names like on DOS, Mac, or CD-ROM. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen # will interpret the first line (until the first dot) of a JavaDoc-style # comment as the brief description. If set to NO, the JavaDoc # comments will behave just like regular Qt-style comments # (thus requiring an explicit @brief command for a brief description.) JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then Doxygen will # interpret the first line (until the first dot) of a Qt-style # comment as the brief description. If set to NO, the comments # will behave just like regular Qt-style comments (thus requiring # an explicit \brief command for a brief description.) QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen # treat a multi-line C++ special comment block (i.e. a block of //! or /// # comments) as a brief description. This used to be the default behaviour. # The new default is to treat a multi-line C++ comment block as a detailed # description. Set this tag to YES if you prefer the old behaviour instead. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented # member inherits the documentation from any documented member that it # re-implements. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce # a new page for each member. If set to NO, the documentation of a member will # be part of the file/class/namespace that contains it. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. # Doxygen uses this value to replace tabs by spaces in code fragments. TAB_SIZE = 2 # This tag can be used to specify a number of aliases that acts # as commands in the documentation. An alias has the form "name=value". # For example adding "sideeffect=\par Side Effects:\n" will allow you to # put the command \sideeffect (or @sideeffect) in the documentation, which # will result in a user-defined paragraph with heading "Side Effects:". # You can put \n's in the value part of an alias to insert newlines. ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding # "class=itcl::class" will allow you to use the command class in the # itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C # sources only. Doxygen will then generate output that is more tailored for C. # For instance, some of the names that are used will be different. The list # of all members will be omitted, etc. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java # sources only. Doxygen will then generate output that is more tailored for # Java. For instance, namespaces will be presented as packages, qualified # scopes will look different, etc. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources only. Doxygen will then generate output that is more tailored for # Fortran. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for # VHDL. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, # and language is one of the parsers supported by doxygen: IDL, Java, # Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, # C++. For instance to make doxygen treat .inc files as Fortran files (default # is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note # that for custom extensions you also need to set FILE_PATTERNS otherwise the # files are not read by doxygen. EXTENSION_MAPPING = # If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all # comments according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you # can mix doxygen, HTML, and XML commands with Markdown formatting. # Disable only in case of backward compatibilities issues. MARKDOWN_SUPPORT = YES # When enabled doxygen tries to link words that correspond to documented classes, # or namespaces to their corresponding documentation. Such a link can be # prevented in individual cases by by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should # set this tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); v.s. # func(std::string) {}). This also makes the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. BUILTIN_STL_SUPPORT = YES # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. # Doxygen will parse them like normal C++ but will assume all classes use public # instead of private inheritance when no explicit protection keyword is present. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES (the # default) will make doxygen replace the get and set methods by a property in # the documentation. This will only work if the methods are indeed getting or # setting a simple type. If this is not the case, or you want to show the # methods anyway, you should set this option to NO. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES (the default) to allow class member groups of # the same type (for instance a group of public functions) to be put as a # subgroup of that type (e.g. under the Public Functions section). Set it to # NO to prevent subgrouping. Alternatively, this can be done per class using # the \nosubgrouping command. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and # unions are shown inside the group in which they are included (e.g. using # @ingroup) instead of on a separate page (for HTML and Man pages) or # section (for LaTeX and RTF). INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and # unions with only public data fields or simple typedef fields will be shown # inline in the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO (the default), structs, classes, and unions are shown on a separate # page (for HTML and Man pages) or section (for LaTeX and RTF). INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum # is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically # be useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. TYPEDEF_HIDES_STRUCT = NO # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to # determine which symbols to keep in memory and which to flush to disk. # When the cache is full, less often used symbols will be written to disk. # For small to medium size projects (<1000 input files) the default value is # probably good enough. For larger projects a too small cache size can cause # doxygen to be busy swapping symbols to and from disk most of the time # causing a significant performance penalty. # If the system has enough physical memory increasing the cache will improve the # performance by keeping more symbols in memory. Note that the value works on # a logarithmic scale so increasing the size by one will roughly double the # memory usage. The cache size is given by this formula: # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols. SYMBOL_CACHE_SIZE = 0 # Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be # set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given # their name and scope. Since this can be an expensive process and often the # same symbol appear multiple times in the code, doxygen keeps a cache of # pre-resolved symbols. If the cache is too small doxygen will become slower. # If the cache is too large, memory is wasted. The cache size is given by this # formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. # Private class members and static file members will be hidden unless # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES all private members of a class # will be included in the documentation. EXTRACT_PRIVATE = YES # If the EXTRACT_PACKAGE tag is set to YES all members with package or internal # scope will be included in the documentation. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local # methods, which are defined in the implementation section but not in # the interface are included in the documentation. # If set to NO (the default) only methods in the interface are included. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base # name of the file that contains the anonymous namespace. By default # anonymous namespaces are hidden. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all # undocumented members of documented classes, files or namespaces. # If set to NO (the default) these members will be included in the # various overviews, but no documentation section is generated. # This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. # If set to NO (the default) these classes will be included in the various # overviews. This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all # friend (class|struct|union) declarations. # If set to NO (the default) these declarations will be included in the # documentation. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any # documentation blocks found inside the body of a function. # If set to NO (the default) these blocks will be appended to the # function's detailed documentation block. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation # that is typed after a \internal command is included. If the tag is set # to NO (the default) then the documentation will be excluded. # Set it to YES to include the internal documentation. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate # file names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen # will show members with their full class and namespace scopes in the # documentation. If set to YES the scope will be hidden. HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen # will put a list of the files that are included by a file in the documentation # of that file. SHOW_INCLUDE_FILES = YES # If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen # will list include files with double quotes in the documentation # rather than with sharp brackets. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES (the default) then a tag [inline] # is inserted in the documentation for inline members. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen # will sort the (detailed) documentation of file and class members # alphabetically by member name. If set to NO the members will appear in # declaration order. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the # brief documentation of file, namespace and class members alphabetically # by member name. If set to NO (the default) the members will appear in # declaration order. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen # will sort the (brief and detailed) documentation of class members so that # constructors and destructors are listed first. If set to NO (the default) # the constructors will appear in the respective orders defined by # SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. # This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO # and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the # hierarchy of group names into alphabetical order. If set to NO (the default) # the group names will appear in their defined order. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be # sorted by fully-qualified names, including namespaces. If set to # NO (the default), the class list will be sorted only by class name, # not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the # alphabetical list. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to # do proper type resolution of all parameters of a function it will reject a # match between the prototype and the implementation of a member function even # if there is only one candidate or it is obvious which candidate to choose # by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen # will still accept a match between prototype and implementation in such cases. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test # commands in the documentation. GENERATE_TESTLIST = NO # The GENERATE_BUGLIST tag can be used to enable (YES) or # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. GENERATE_BUGLIST = NO # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting # \deprecated commands in the documentation. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional # documentation sections, marked by \if section-label ... \endif # and \cond section-label ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines # the initial value of a variable or macro consists of for it to appear in # the documentation. If the initializer consists of more lines than specified # here it will be hidden. Use a value of 0 to hide initializers completely. # The appearance of the initializer of individual variables and macros in the # documentation can be controlled using \showinitializer or \hideinitializer # command in the documentation regardless of this setting. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated # at the bottom of the documentation of classes and structs. If set to YES the # list will mention the files that were used to generate the documentation. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. # This will remove the Files entry from the Quick Index and from the # Folder Tree View (if specified). The default is YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the # Namespaces page. This will remove the Namespaces entry from the Quick Index # and from the Folder Tree View (if specified). The default is YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command , where is the value of # the FILE_VERSION_FILTER tag, and is the name of an input file # provided by doxygen. Whatever the program writes to standard output # is used as the file version. See the manual for examples. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. # You can optionally specify a file name after the option, if omitted # DoxygenLayout.xml will be used as the name of the layout file. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files # containing the references data. This must be a list of .bib files. The # .bib extension is automatically appended if omitted. Using this command # requires the bibtex tool to be installed. See also # http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style # of the bibliography can be controlled using LATEX_BIB_STYLE. To use this # feature you need bibtex and perl available in the search path. Do not use # file names with spaces, bibtex cannot handle them. CITE_BIB_FILES = #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated # by doxygen. Possible values are YES and NO. If left blank NO is used. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated by doxygen. Possible values are YES and NO. If left blank # NO is used. WARNINGS = YES # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings # for undocumented members. If EXTRACT_ALL is set to YES then this flag will # automatically be disabled. WARN_IF_UNDOCUMENTED = YES # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some # parameters in a documented function, or documenting parameters that # don't exist or using markup commands wrongly. WARN_IF_DOC_ERROR = YES # The WARN_NO_PARAMDOC option can be enabled to get warnings for # functions that are documented, but have no documentation for their parameters # or return value. If set to NO (the default) doxygen will only warn about # wrong or incomplete parameter documentation, but not about the absence of # documentation. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that # doxygen can produce. The string should contain the $file, $line, and $text # tags, which will be replaced by the file and line number from which the # warning originated and the warning text. Optionally the format may contain # $version, which will be replaced by the version of the file (if it could # be obtained via FILE_VERSION_FILTER) WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning # and error messages should be written. If left blank the output is written # to stderr. WARN_LOGFILE = #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = src # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is # also the default input encoding. Doxygen uses libiconv (or the iconv built # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for # the list of possible encodings. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank the following patterns are tested: # *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh # *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py # *.f90 *.f *.for *.vhd *.vhdl FILE_PATTERNS = *.h \ *.cpp # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. Note that the wildcards are matched # against the file with absolute path, so to exclude all test directories # for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or # directories that contain example code fragments that are included (see # the \include command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank all files are included. EXAMPLE_PATTERNS = # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude # commands irrespective of the value of the RECURSIVE tag. # Possible values are YES and NO. If left blank NO is used. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or # directories that contain image that are included in the documentation (see # the \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command , where # is the value of the INPUT_FILTER tag, and is the name of an # input file. Doxygen will then use the output that the filter program writes # to standard output. If FILTER_PATTERNS is specified, this tag will be # ignored. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further # info on how filters are used. If FILTER_PATTERNS is empty or if # non of the patterns match the file name, INPUT_FILTER is applied. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will be used to filter the input files when producing source # files to browse (i.e. when SOURCE_BROWSER is set to YES). FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) # and it is also possible to disable source filtering for a specific pattern # using *.ext= (so without naming a filter). This option only has effect when # FILTER_SOURCE_FILES is enabled. FILTER_SOURCE_PATTERNS = # If the USE_MD_FILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page (index.html). # This can be useful if you have a project on for instance GitHub and want reuse # the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will # be generated. Documented entities will be cross-referenced with these sources. # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. SOURCE_BROWSER = YES # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code # fragments. Normal C, C++ and Fortran comments will always remain visible. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES # then for each documented function all documented # functions referencing it will be listed. REFERENCED_BY_RELATION = YES # If the REFERENCES_RELATION tag is set to YES # then for each documented function all documented entities # called/used by that function will be listed. REFERENCES_RELATION = YES # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will # link to the source code. Otherwise they will link to the documentation. REFERENCES_LINK_SOURCE = YES # If the USE_HTAGS tag is set to YES then the references to source code # will point to the HTML generated by the htags(1) tool instead of doxygen # built-in source browser. The htags tool is part of GNU's global source # tagging system (see http://www.gnu.org/software/global/global.html). You # will need version 4.8.6 or higher. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen # will generate a verbatim copy of the header file for each class for # which an include is specified. Set to NO to disable this. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index # of all compounds will be generated. Enable this if the project # contains a lot of classes, structs, unions or interfaces. ALPHABETICAL_INDEX = NO # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns # in which this list will be split (can be a number in the range [1..20]) COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all # classes will be put under the same header in the alphabetical index. # The IGNORE_PREFIX tag can be used to specify one or more prefixes that # should be ignored while generating the index headers. IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES (the default) Doxygen will # generate HTML output. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `html' will be used as the default path. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for # each generated HTML page (for example: .htm,.php,.asp). If it is left blank # doxygen will generate files with .html extension. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a personal HTML header for # each generated HTML page. If it is left blank doxygen will generate a # standard header. Note that when using a custom header you are responsible # for the proper inclusion of any scripts and style sheets that doxygen # needs, which is dependent on the configuration options used. # It is advised to generate a default header using "doxygen -w html # header.html footer.html stylesheet.css YourConfigFile" and then modify # that header. Note that the header is subject to change so you typically # have to redo this when upgrading to a newer version of doxygen or when # changing the value of configuration settings such as GENERATE_TREEVIEW! HTML_HEADER = # The HTML_FOOTER tag can be used to specify a personal HTML footer for # each generated HTML page. If it is left blank doxygen will generate a # standard footer. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to # fine-tune the look of the HTML output. If left blank doxygen will # generate a default style sheet. Note that it is recommended to use # HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this # tag will in the future become obsolete. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify an additional # user-defined cascading style sheet that is included after the standard # style sheets created by doxygen. Using this option one can overrule # certain style aspects. This is preferred over using HTML_STYLESHEET # since it does not replace the standard style sheet and is therefor more # robust against future updates. Doxygen will copy the style sheet file to # the output directory. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that # the files will be copied as-is; there are no commands or markers available. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. # Doxygen will adjust the colors in the style sheet and background images # according to this color. Hue is specified as an angle on a colorwheel, # see http://en.wikipedia.org/wiki/Hue for more information. # For instance the value 0 represents red, 60 is yellow, 120 is green, # 180 is cyan, 240 is blue, 300 purple, and 360 is red again. # The allowed range is 0 to 359. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of # the colors in the HTML output. For a value of 0 the output will use # grayscales only. A value of 255 will produce the most vivid colors. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to # the luminance component of the colors in the HTML output. Values below # 100 gradually make the output lighter, whereas values above 100 make # the output darker. The value divided by 100 is the actual gamma applied, # so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, # and 100 does not change the gamma. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting # this to NO can help when comparing the output of multiple runs. HTML_TIMESTAMP = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. HTML_DYNAMIC_SECTIONS = YES # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of # entries shown in the various tree structured indices initially; the user # can expand and collapse entries dynamically later on. Doxygen will expand # the tree to such a level that at most the specified number of entries are # visible (unless a fully collapsed tree already exceeds this amount). # So setting the number of entries 1 will produce a full collapsed tree by # default. 0 is a special value representing an infinite number of entries # and will result in a full expanded tree by default. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files # will be generated that can be used as input for Apple's Xcode 3 # integrated development environment, introduced with OSX 10.5 (Leopard). # To create a documentation set, doxygen will generate a Makefile in the # HTML output directory. Running make will produce the docset in that # directory and running "make install" will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find # it at startup. # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. GENERATE_DOCSET = YES # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the # feed. A documentation feed provides an umbrella under which multiple # documentation sets from a single provider (such as a company or product suite) # can be grouped. DOCSET_FEEDNAME = "Bio++ Sequence Library" # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that # should uniquely identify the documentation set bundle. This should be a # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen # will append .docset to the name. DOCSET_BUNDLE_ID = bpp.seq # When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely # identify the documentation publisher. This should be a reverse domain-name # style string, e.g. com.mycompany.MyDocSet.documentation. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES, additional index files # will be generated that can be used as input for tools like the # Microsoft HTML help workshop to generate a compiled HTML help file (.chm) # of the generated HTML documentation. GENERATE_HTMLHELP = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can # be used to specify the file name of the resulting .chm file. You # can add a path in front of the file if the result should not be # written to the html output directory. CHM_FILE = # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can # be used to specify the location (absolute path including file name) of # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run # the HTML help compiler on the generated index.hhp. HHC_LOCATION = # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag # controls if a separate .chi index file is generated (YES) or that # it should be included in the master .chm file (NO). GENERATE_CHI = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING # is used to encode HtmlHelp index (hhk), content (hhc) and project file # content. CHM_INDEX_ENCODING = # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag # controls whether a binary table of contents is generated (YES) or a # normal table of contents (NO) in the .chm file. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members # to the contents of the HTML help documentation and to the tree view. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated # that can be used as input for Qt's qhelpgenerator to generate a # Qt Compressed Help (.qch) of the generated HTML documentation. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can # be used to specify the file name of the resulting .qch file. # The path specified is relative to the HTML output folder. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#namespace QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#virtual-folders QHP_VIRTUAL_FOLDER = doc # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to # add. For more information please see # http://doc.trolltech.com/qthelpproject.html#custom-filters QHP_CUST_FILTER_NAME = # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see # # Qt Help Project / Custom Filters. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's # filter section matches. # # Qt Help Project / Filter Attributes. QHP_SECT_FILTER_ATTRS = # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can # be used to specify the location of Qt's qhelpgenerator. # If non-empty doxygen will try to run qhelpgenerator on the generated # .qhp file. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files # will be generated, which together with the HTML files, form an Eclipse help # plugin. To install this plugin and make it available under the help contents # menu in Eclipse, the contents of the directory containing the HTML and XML # files needs to be copied into the plugins directory of eclipse. The name of # the directory within the plugins directory should be the same as # the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before # the help appears. GENERATE_ECLIPSEHELP = NO # A unique identifier for the eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have # this name. ECLIPSE_DOC_ID = org.doxygen.Project # The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) # at top of each HTML page. The value NO (the default) enables the index and # the value YES disables it. Since the tabs have the same information as the # navigation tree you can set this option to NO if you already set # GENERATE_TREEVIEW to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. # If the tag value is set to YES, a side panel will be generated # containing a tree-like index structure (just like the one that # is generated for HTML Help). For this to work a browser that supports # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). # Windows users are probably better off using the HTML help feature. # Since the tree basically has the same information as the tab index you # could consider to set DISABLE_INDEX to NO when enabling this option. GENERATE_TREEVIEW = YES # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values # (range [0,1..20]) that doxygen will group on one line in the generated HTML # documentation. Note that a value of 0 will completely suppress the enum # values from appearing in the overview section. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree # is shown. TREEVIEW_WIDTH = 250 # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open # links to external symbols imported via tag files in a separate window. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of Latex formulas included # as images in the HTML documentation. The default is 10. Note that # when you change the font size after a successful doxygen run you need # to manually remove any form_*.png images from the HTML output directory # to force them to be regenerated. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are # not supported properly for IE 6.0, but are supported on all modern browsers. # Note that when changing this option you need to delete any form_*.png files # in the HTML output before the changes have effect. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax # (see http://www.mathjax.org) which uses client side Javascript for the # rendering instead of using prerendered bitmaps. Use this if you do not # have LaTeX installed or if you want to formulas look prettier in the HTML # output. When enabled you may also need to install MathJax separately and # configure the path to it using the MATHJAX_RELPATH option. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # thA MathJax output. Supported types are HTML-CSS, NativeMML (i.e. MathML) and # SVG. The default value is HTML-CSS, which is slower, but has the best # compatibility. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the # HTML output directory using the MATHJAX_RELPATH option. The destination # directory should contain the MathJax.js script. For instance, if the mathjax # directory is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to # the MathJax Content Delivery Network so you can quickly see the result without # installing MathJax. However, it is strongly recommended to install a local # copy of MathJax from http://www.mathjax.org before deployment. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension # names that should be enabled during MathJax rendering. MATHJAX_EXTENSIONS = # When the SEARCHENGINE tag is enabled doxygen will generate a search box # for the HTML output. The underlying search engine uses javascript # and DHTML and should work on any modern browser. Note that when using # HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets # (GENERATE_DOCSET) there is already a search function so this one should # typically be disabled. For large projects the javascript based search engine # can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. SEARCHENGINE = YES # When the SERVER_BASED_SEARCH tag is enabled the search engine will be # implemented using a web server instead of a web client using Javascript. # There are two flavours of web server based search depending on the # EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for # searching and an index file used by the script. When EXTERNAL_SEARCH is # enabled the indexing and searching needs to be provided by external tools. # See the manual for details. SERVER_BASED_SEARCH = NO # When EXTERNAL_SEARCH is enabled doxygen will no longer generate the PHP # script for searching. Instead the search results are written to an XML file # which needs to be processed by an external indexer. Doxygen will invoke an # external search engine pointed to by the SEARCHENGINE_URL option to obtain # the search results. Doxygen ships with an example indexer (doxyindexer) and # search engine (doxysearch.cgi) which are based on the open source search engine # library Xapian. See the manual for configuration details. EXTERNAL_SEARCH = NO # The SEARCHENGINE_URL should point to a search engine hosted by a web server # which will returned the search results when EXTERNAL_SEARCH is enabled. # Doxygen ships with an example search engine (doxysearch) which is based on # the open source search engine library Xapian. See the manual for configuration # details. SEARCHENGINE_URL = # When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed # search data is written to a file for indexing by an external tool. With the # SEARCHDATA_FILE tag the name of this file can be specified. SEARCHDATA_FILE = searchdata.xml # When SERVER_BASED_SEARCH AND EXTERNAL_SEARCH are both enabled the # EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is # useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple # projects and redirect the results back to the right project. EXTERNAL_SEARCH_ID = # The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen # projects other than the one defined by this configuration file, but that are # all added to the same external search index. Each project needs to have a # unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id # of to a relative location where the documentation can be found. # The format is: EXTRA_SEARCH_MAPPINGS = id1=loc1 id2=loc2 ... EXTRA_SEARCH_MAPPINGS = #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will # generate Latex output. GENERATE_LATEX = NO # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `latex' will be used as the default path. LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. If left blank `latex' will be used as the default command name. # Note that when enabling USE_PDFLATEX this option is only used for # generating bitmaps for formulas in the HTML output, but not in the # Makefile that is written to the output directory. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to # generate index for LaTeX. If left blank `makeindex' will be used as the # default command name. MAKEINDEX_CMD_NAME = makeindex # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact # LaTeX documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_LATEX = NO # The PAPER_TYPE tag can be used to set the paper type that is used # by the printer. Possible values are: a4, letter, legal and # executive. If left blank a4wide will be used. PAPER_TYPE = a4wide # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX # packages that should be included in the LaTeX output. EXTRA_PACKAGES = amsmath # The LATEX_HEADER tag can be used to specify a personal LaTeX header for # the generated latex document. The header should contain everything until # the first chapter. If it is left blank doxygen will generate a # standard header. Notice: only use this tag if you know what you are doing! LATEX_HEADER = # The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for # the generated latex document. The footer should contain everything after # the last chapter. If it is left blank doxygen will generate a # standard footer. Notice: only use this tag if you know what you are doing! LATEX_FOOTER = # The LATEX_EXTRA_FILES tag can be used to specify one or more extra images # or other source files which should be copied to the LaTeX output directory. # Note that the files will be copied as-is; there are no commands or markers # available. LATEX_EXTRA_FILES = # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated # is prepared for conversion to pdf (using ps2pdf). The pdf file will # contain links (just like the HTML output) instead of page references # This makes the output suitable for online browsing using a pdf viewer. PDF_HYPERLINKS = NO # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of # plain latex in the generated Makefile. Set this option to YES to get a # higher quality PDF documentation. USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. # command to the generated LaTeX files. This will instruct LaTeX to keep # running if errors occur, instead of asking the user for help. # This option is also used when generating formulas in HTML. LATEX_BATCHMODE = NO # If LATEX_HIDE_INDICES is set to YES then doxygen will not # include the index chapters (such as File Index, Compound Index, etc.) # in the output. LATEX_HIDE_INDICES = NO # If LATEX_SOURCE_CODE is set to YES then doxygen will include # source code with syntax highlighting in the LaTeX output. # Note that which sources are shown also depends on other settings # such as SOURCE_BROWSER. LATEX_SOURCE_CODE = NO # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See # http://en.wikipedia.org/wiki/BibTeX for more info. LATEX_BIB_STYLE = plain #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output # The RTF output is optimized for Word 97 and may not look very pretty with # other RTF readers or editors. GENERATE_RTF = NO # The RTF_OUTPUT tag is used to specify where the RTF docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `rtf' will be used as the default path. RTF_OUTPUT = rtf # If the COMPACT_RTF tag is set to YES Doxygen generates more compact # RTF documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_RTF = NO # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated # will contain hyperlink fields. The RTF file will # contain links (just like the HTML output) instead of page references. # This makes the output suitable for online browsing using WORD or other # programs which support those fields. # Note: wordpad (write) and others do not support links. RTF_HYPERLINKS = NO # Load style sheet definitions from file. Syntax is similar to doxygen's # config file, i.e. a series of assignments. You only have to provide # replacements, missing definitions are set to their default value. RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an rtf document. # Syntax is similar to doxygen's config file. RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- # If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = NO # The MAN_OUTPUT tag is used to specify where the man pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `man' will be used as the default path. MAN_OUTPUT = man # The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .3 # If the MAN_LINKS tag is set to YES and Doxygen generates man output, # then it will generate one additional man file for each entity # documented in the real man page(s). These additional files # only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = NO #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- # If the GENERATE_XML tag is set to YES Doxygen will # generate an XML file that captures the structure of # the code including all documentation. GENERATE_XML = NO # The XML_OUTPUT tag is used to specify where the XML pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `xml' will be used as the default path. XML_OUTPUT = xml # The XML_SCHEMA tag can be used to specify an XML schema, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_SCHEMA = # The XML_DTD tag can be used to specify an XML DTD, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_DTD = # If the XML_PROGRAMLISTING tag is set to YES Doxygen will # dump the program listings (including syntax highlighting # and cross-referencing information) to the XML output. Note that # enabling this will significantly increase the size of the XML output. XML_PROGRAMLISTING = YES #--------------------------------------------------------------------------- # configuration options related to the DOCBOOK output #--------------------------------------------------------------------------- # If the GENERATE_DOCBOOK tag is set to YES Doxygen will generate DOCBOOK files # that can be used to generate PDF. GENERATE_DOCBOOK = NO # The DOCBOOK_OUTPUT tag is used to specify where the DOCBOOK pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be put in # front of it. If left blank docbook will be used as the default path. DOCBOOK_OUTPUT = docbook #--------------------------------------------------------------------------- # configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will # generate an AutoGen Definitions (see autogen.sf.net) file # that captures the structure of the code including all # documentation. Note that this feature is still experimental # and incomplete at the moment. GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # configuration options related to the Perl module output #--------------------------------------------------------------------------- # If the GENERATE_PERLMOD tag is set to YES Doxygen will # generate a Perl module file that captures the structure of # the code including all documentation. Note that this # feature is still experimental and incomplete at the # moment. GENERATE_PERLMOD = NO # If the PERLMOD_LATEX tag is set to YES Doxygen will generate # the necessary Makefile rules, Perl scripts and LaTeX code to be able # to generate PDF and DVI output from the Perl module output. PERLMOD_LATEX = NO # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be # nicely formatted so it can be parsed by a human reader. This is useful # if you want to understand what is going on. On the other hand, if this # tag is set to NO the size of the Perl module output will be much smaller # and Perl will parse it just the same. PERLMOD_PRETTY = YES # The names of the make variables in the generated doxyrules.make file # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. # This is useful so different doxyrules.make files included by the same # Makefile don't overwrite each other's variables. PERLMOD_MAKEVAR_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will # evaluate all C-preprocessor directives found in the sources and include # files. ENABLE_PREPROCESSING = YES # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro # names in the source code. If set to NO (the default) only conditional # compilation will be performed. Macro expansion can be done in a controlled # way by setting EXPAND_ONLY_PREDEF to YES. MACRO_EXPANSION = NO # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES # then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. EXPAND_ONLY_PREDEF = NO # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files # pointed to by INCLUDE_PATH will be searched when a #include is found. SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by # the preprocessor. INCLUDE_PATH = # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the # directories. If left blank, the patterns specified with FILE_PATTERNS will # be used. INCLUDE_FILE_PATTERNS = # The PREDEFINED tag can be used to specify one or more macro names that # are defined before the preprocessor is started (similar to the -D option of # gcc). The argument of the tag is a list of macros of the form: name # or name=definition (no spaces). If the definition and the = are # omitted =1 is assumed. To prevent a macro definition from being # undefined via #undef or recursively expanded use the := operator # instead of the = operator. PREDEFINED = # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. # The macro definition that is found in the sources will be used. # Use the PREDEFINED tag if you want to use a different macro definition that # overrules the definition found in the source code. EXPAND_AS_DEFINED = # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all references to function-like macros # that are alone on a line, have an all uppercase name, and do not end with a # semicolon, because these will confuse the parser if not removed. SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration::additions related to external references #--------------------------------------------------------------------------- # The TAGFILES option can be used to specify one or more tagfiles. For each # tag file the location of the external documentation should be added. The # format of a tag file without this location is as follows: # TAGFILES = file1 file2 ... # Adding location for the tag files is done as follows: # TAGFILES = file1=loc1 "file2 = loc2" ... # where "loc1" and "loc2" can be relative or absolute paths # or URLs. Note that each tag file must have a unique name (where the name does # NOT include the path). If a tag file is not located in the directory in which # doxygen is run, you must also specify the path to the tagfile here. TAGFILES = ../bpp-core/BppCore.tag=../../bpp-core/html/ # When a file name is specified after GENERATE_TAGFILE, doxygen will create # a tag file that is based on the input files it reads. GENERATE_TAGFILE = BppSeq.tag # If the ALLEXTERNALS tag is set to YES all external classes will be listed # in the class index. If set to NO only the inherited external classes # will be listed. ALLEXTERNALS = NO # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed # in the modules index. If set to NO, only the current project's groups will # be listed. EXTERNAL_GROUPS = YES # If the EXTERNAL_PAGES tag is set to YES all external pages will be listed # in the related pages index. If set to NO, only the current project's # pages will be listed. EXTERNAL_PAGES = YES # The PERL_PATH should be the absolute path and name of the perl script # interpreter (i.e. the result of `which perl'). PERL_PATH = /usr/bin/perl #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base # or super classes. Setting the tag to NO turns the diagrams off. Note that # this option also works with HAVE_DOT disabled, but it is recommended to # install and use dot, since it yields more powerful graphs. CLASS_DIAGRAMS = YES # You can define message sequence charts within doxygen comments using the \msc # command. Doxygen will then run the mscgen tool (see # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the # documentation. The MSCGEN_PATH tag allows you to specify the directory where # the mscgen tool resides. If left empty the tool is assumed to be found in the # default search path. MSCGEN_PATH = # If set to YES, the inheritance and collaboration graphs will hide # inheritance and usage relations if the target is undocumented # or is not a class. HIDE_UNDOC_RELATIONS = YES # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz, a graph visualization # toolkit from AT&T and Lucent Bell Labs. The other options in this section # have no effect if this option is set to NO (the default) HAVE_DOT = YES # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is # allowed to run in parallel. When set to 0 (the default) doxygen will # base this on the number of processors available in the system. You can set it # explicitly to a value larger than 0 to get control over the balance # between CPU load and processing speed. DOT_NUM_THREADS = 0 # By default doxygen will use the Helvetica font for all dot files that # doxygen generates. When you want a differently looking font you can specify # the font name using DOT_FONTNAME. You need to make sure dot is able to find # the font, which can be done by putting it in a standard location or by setting # the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the # directory containing the font. DOT_FONTNAME = FreeSans # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. # The default size is 10pt. DOT_FONTSIZE = 10 # By default doxygen will tell dot to use the Helvetica font. # If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to # set the path where dot can find it. DOT_FONTPATH = # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect inheritance relations. Setting this tag to YES will force the # CLASS_DIAGRAMS tag to NO. CLASS_GRAPH = YES # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect implementation dependencies (inheritance, containment, and # class references variables) of the class with other documented classes. COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen # will generate a graph for groups, showing the direct groups dependencies GROUP_GRAPHS = YES # If the UML_LOOK tag is set to YES doxygen will generate inheritance and # collaboration diagrams in a style similar to the OMG's Unified Modeling # Language. UML_LOOK = NO # If the UML_LOOK tag is enabled, the fields and methods are shown inside # the class node. If there are many fields or methods and many nodes the # graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS # threshold limits the number of items for each type to make the size more # managable. Set this to 0 for no limit. Note that the threshold may be # exceeded by 50% before the limit is enforced. UML_LIMIT_NUM_FIELDS = 10 # If set to YES, the inheritance and collaboration graphs will show the # relations between templates and their instances. TEMPLATE_RELATIONS = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT # tags are set to YES then doxygen will generate a graph for each documented # file showing the direct and indirect include dependencies of the file with # other documented files. INCLUDE_GRAPH = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and # HAVE_DOT tags are set to YES then doxygen will generate a graph for each # documented header file showing the documented files that directly or # indirectly include this file. INCLUDED_BY_GRAPH = YES # If the CALL_GRAPH and HAVE_DOT options are set to YES then # doxygen will generate a call dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable call graphs # for selected functions only using the \callgraph command. CALL_GRAPH = NO # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then # doxygen will generate a caller dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable caller # graphs for selected functions only using the \callergraph command. CALLER_GRAPH = NO # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will generate a graphical hierarchy of all classes instead of a textual one. GRAPHICAL_HIERARCHY = YES # If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES # then doxygen will show the dependencies a directory has on other directories # in a graphical way. The dependency relations are determined by the #include # relations between the files in the directories. DIRECTORY_GRAPH = YES # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. Possible values are svg, png, jpg, or gif. # If left blank png will be used. If you choose svg you need to set # HTML_FILE_EXTENSION to xhtml in order to make the SVG files # visible in IE 9+ (other browsers do not have this requirement). DOT_IMAGE_FORMAT = png # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to # enable generation of interactive SVG images that allow zooming and panning. # Note that this requires a modern browser other than Internet Explorer. # Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you # need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files # visible. Older versions of IE do not have SVG support. INTERACTIVE_SVG = NO # The tag DOT_PATH can be used to specify the path where the dot tool can be # found. If left blank, it is assumed the dot tool can be found in the path. DOT_PATH = # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the # \dotfile command). DOTFILE_DIRS = # The MSCFILE_DIRS tag can be used to specify one or more directories that # contain msc files that are included in the documentation (see the # \mscfile command). MSCFILE_DIRS = # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of # nodes that will be shown in the graph. If the number of nodes in a graph # becomes larger than this value, doxygen will truncate the graph, which is # visualized by representing a node as a red box. Note that doxygen if the # number of direct children of the root node in a graph is already larger than # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. DOT_GRAPH_MAX_NODES = 50 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the # graphs generated by dot. A depth value of 3 means that only nodes reachable # from the root by following a path via at most 3 edges will be shown. Nodes # that lay further from the root node will be omitted. Note that setting this # option to 1 or 2 may greatly reduce the computation time needed for large # code bases. Also note that the size of a graph can be further restricted by # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. MAX_DOT_GRAPH_DEPTH = 0 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent # background. This is disabled by default, because dot on Windows does not # seem to support this out of the box. Warning: Depending on the platform used, # enabling this option may lead to badly anti-aliased labels on the edges of # a graph (i.e. they become hard to read). DOT_TRANSPARENT = NO # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) # support this, this feature is disabled by default. DOT_MULTI_TARGETS = YES # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will # generate a legend page explaining the meaning of the various boxes and # arrows in the dot generated graphs. GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will # remove the intermediate dot files that are used to generate # the various graphs. DOT_CLEANUP = YES bpp-seq-2.1.0/src/CMakeLists.txt000644 000000 000000 00000021312 12147656566 016407 0ustar00rootroot000000 000000 # CMake script for Bio++ Sequence Library # Author: Sylvain Gaillard and Julien Dutheil # Created: 20/08/2009 # File list SET(CPP_FILES Bpp/Seq/DNAToRNA.cpp Bpp/Seq/NucleicAcidsReplication.cpp Bpp/Seq/Sequence.cpp Bpp/Seq/SequenceWithAnnotation.cpp Bpp/Seq/SequenceWithAnnotationTools.cpp Bpp/Seq/SequenceWithQuality.cpp Bpp/Seq/SequenceExceptions.cpp Bpp/Seq/SequencePositionIterators.cpp Bpp/Seq/SequenceTools.cpp Bpp/Seq/SequenceWithQualityTools.cpp Bpp/Seq/SequenceWalker.cpp Bpp/Seq/Site.cpp Bpp/Seq/SiteExceptions.cpp Bpp/Seq/SiteTools.cpp Bpp/Seq/StringSequenceTools.cpp Bpp/Seq/SymbolList.cpp Bpp/Seq/SymbolListTools.cpp Bpp/Seq/Transliterator.cpp Bpp/Seq/DistanceMatrix.cpp Bpp/Seq/CodonSiteTools.cpp Bpp/Seq/Alphabet/AbstractAlphabet.cpp Bpp/Seq/Alphabet/AlphabetExceptions.cpp Bpp/Seq/Alphabet/AlphabetTools.cpp Bpp/Seq/Alphabet/CodonAlphabet.cpp Bpp/Seq/Alphabet/DefaultAlphabet.cpp Bpp/Seq/Alphabet/CaseMaskedAlphabet.cpp Bpp/Seq/Alphabet/DNA.cpp Bpp/Seq/Alphabet/ProteicAlphabet.cpp Bpp/Seq/Alphabet/RNA.cpp Bpp/Seq/Alphabet/StandardCodonAlphabet.cpp Bpp/Seq/Alphabet/VertebrateMitochondrialCodonAlphabet.cpp Bpp/Seq/Alphabet/InvertebrateMitochondrialCodonAlphabet.cpp Bpp/Seq/Alphabet/EchinodermMitochondrialCodonAlphabet.cpp Bpp/Seq/Alphabet/YeastMitochondrialCodonAlphabet.cpp Bpp/Seq/Alphabet/WordAlphabet.cpp Bpp/Seq/Alphabet/RNY.cpp Bpp/Seq/Alphabet/BinaryAlphabet.cpp Bpp/Seq/GeneticCode/GeneticCode.cpp Bpp/Seq/GeneticCode/StandardGeneticCode.cpp Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.cpp Bpp/Seq/GeneticCode/InvertebrateMitochondrialGeneticCode.cpp Bpp/Seq/GeneticCode/EchinodermMitochondrialGeneticCode.cpp Bpp/Seq/GeneticCode/YeastMitochondrialGeneticCode.cpp Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.cpp Bpp/Seq/AlphabetIndex/MiyataAAChemicalDistance.cpp Bpp/Seq/AlphabetIndex/BLOSUM50.cpp Bpp/Seq/AlphabetIndex/DefaultNucleotideScore.cpp Bpp/Seq/AlphabetIndex/AAIndex1Entry.cpp Bpp/Seq/AlphabetIndex/AAIndex2Entry.cpp Bpp/Seq/AlphabetIndex/SimpleScore.cpp Bpp/Seq/AlphabetIndex/__GranthamMatrixCode Bpp/Seq/AlphabetIndex/__MiyataMatrixCode Bpp/Seq/AlphabetIndex/__BLOSUM50MatrixCode Bpp/Seq/Container/AbstractSequenceContainer.cpp Bpp/Seq/Container/AlignedSequenceContainer.cpp Bpp/Seq/Container/MapSequenceContainer.cpp Bpp/Seq/Container/SequenceContainerTools.cpp Bpp/Seq/Container/SiteContainerExceptions.cpp Bpp/Seq/Container/SiteContainerTools.cpp Bpp/Seq/Container/VectorSequenceContainer.cpp Bpp/Seq/Container/VectorSiteContainer.cpp Bpp/Seq/Container/CompressedVectorSiteContainer.cpp Bpp/Seq/Container/SiteContainerIterator.cpp Bpp/Seq/Container/SequenceContainerIterator.cpp Bpp/Seq/Io/Clustal.cpp Bpp/Seq/Io/Dcse.cpp Bpp/Seq/Io/Fasta.cpp Bpp/Seq/Io/Mase.cpp Bpp/Seq/Io/MaseTools.cpp Bpp/Seq/Io/Phylip.cpp Bpp/Seq/Io/IoSequenceFactory.cpp Bpp/Seq/Io/GenBank.cpp Bpp/Seq/Io/PhredPhd.cpp Bpp/Seq/Io/PhredPoly.cpp Bpp/Seq/Io/NexusIoSequence.cpp Bpp/Seq/Io/NexusTools.cpp Bpp/Seq/Io/Stockholm.cpp Bpp/Seq/Io/StreamSequenceIterator.cpp Bpp/Seq/Io/BppOSequenceReaderFormat.cpp Bpp/Seq/Io/BppOSequenceWriterFormat.cpp Bpp/Seq/Io/BppOAlignmentReaderFormat.cpp Bpp/Seq/Io/BppOAlignmentWriterFormat.cpp Bpp/Seq/Io/BppOSequenceStreamReaderFormat.cpp Bpp/Seq/Io/BppOAlphabetIndex1Format.cpp Bpp/Seq/Io/BppOAlphabetIndex2Format.cpp Bpp/Seq/App/SequenceApplicationTools.cpp ) SET(H_FILES Bpp/Seq/CodonSiteTools.h Bpp/Seq/DNAToRNA.h Bpp/Seq/NucleicAcidsReplication.h Bpp/Seq/SequenceExceptions.h Bpp/Seq/SequencePositionIterators.h Bpp/Seq/Sequence.h Bpp/Seq/SequenceIterator.h Bpp/Seq/SequenceWithAnnotation.h Bpp/Seq/SequenceWithAnnotationTools.h Bpp/Seq/SequenceWithQuality.h Bpp/Seq/SequenceTools.h Bpp/Seq/SequenceWithQualityTools.h Bpp/Seq/SequenceWalker.h Bpp/Seq/SiteExceptions.h Bpp/Seq/Site.h Bpp/Seq/SiteIterator.h Bpp/Seq/SiteTools.h Bpp/Seq/StringSequenceTools.h Bpp/Seq/SymbolList.h Bpp/Seq/SymbolListTools.h Bpp/Seq/Transliterator.h Bpp/Seq/DistanceMatrix.h Bpp/Seq/Alphabet/AbstractAlphabet.h Bpp/Seq/Alphabet/AlphabetExceptions.h Bpp/Seq/Alphabet/Alphabet.h Bpp/Seq/Alphabet/AlphabetState.h Bpp/Seq/Alphabet/AlphabetTools.h Bpp/Seq/Alphabet/CodonAlphabet.h Bpp/Seq/Alphabet/CaseMaskedAlphabet.h Bpp/Seq/Alphabet/DefaultAlphabet.h Bpp/Seq/Alphabet/DNA.h Bpp/Seq/Alphabet/LetterAlphabet.h Bpp/Seq/Alphabet/NucleicAlphabet.h Bpp/Seq/Alphabet/NucleicAlphabetState.h Bpp/Seq/Alphabet/ProteicAlphabet.h Bpp/Seq/Alphabet/ProteicAlphabetState.h Bpp/Seq/Alphabet/RNA.h Bpp/Seq/Alphabet/StandardCodonAlphabet.h Bpp/Seq/Alphabet/VertebrateMitochondrialCodonAlphabet.h Bpp/Seq/Alphabet/InvertebrateMitochondrialCodonAlphabet.h Bpp/Seq/Alphabet/EchinodermMitochondrialCodonAlphabet.h Bpp/Seq/Alphabet/YeastMitochondrialCodonAlphabet.h Bpp/Seq/Alphabet/WordAlphabet.h Bpp/Seq/Alphabet/RNY.h Bpp/Seq/Alphabet/BinaryAlphabet.h Bpp/Seq/GeneticCode/GeneticCode.h Bpp/Seq/GeneticCode/StandardGeneticCode.h Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.h Bpp/Seq/GeneticCode/InvertebrateMitochondrialGeneticCode.h Bpp/Seq/GeneticCode/EchinodermMitochondrialGeneticCode.h Bpp/Seq/GeneticCode/YeastMitochondrialGeneticCode.h Bpp/Seq/Io/AbstractISequence.h Bpp/Seq/Io/AbstractIAlignment.h Bpp/Seq/Io/AbstractOSequence.h Bpp/Seq/Io/AbstractOAlignment.h Bpp/Seq/Io/Clustal.h Bpp/Seq/Io/Dcse.h Bpp/Seq/Io/Fasta.h Bpp/Seq/Io/IOSequence.h Bpp/Seq/Io/ISequence.h Bpp/Seq/Io/ISequenceStream.h Bpp/Seq/Io/IoSequenceStream.h Bpp/Seq/Io/Mase.h Bpp/Seq/Io/MaseTools.h Bpp/Seq/Io/OSequence.h Bpp/Seq/Io/OSequenceStream.h Bpp/Seq/Io/Phylip.h Bpp/Seq/Io/GenBank.h Bpp/Seq/Io/PhredPhd.h Bpp/Seq/Io/PhredPoly.h Bpp/Seq/Io/NexusIOSequence.h Bpp/Seq/Io/NexusTools.h Bpp/Seq/Io/Stockholm.h Bpp/Seq/Io/IoSequenceFactory.h Bpp/Seq/Io/SequenceFileIndex.h Bpp/Seq/Io/StreamSequenceIterator.h Bpp/Seq/Io/BppOSequenceReaderFormat.h Bpp/Seq/Io/BppOSequenceWriterFormat.h Bpp/Seq/Io/BppOAlignmentReaderFormat.h Bpp/Seq/Io/BppOAlignmentWriterFormat.h Bpp/Seq/Io/BppOSequenceStreamReaderFormat.h Bpp/Seq/Io/BppOAlphabetIndex1Format.h Bpp/Seq/Io/BppOAlphabetIndex2Format.h Bpp/Seq/Container/VectorSequenceContainer.h Bpp/Seq/Container/VectorSiteContainer.h Bpp/Seq/Container/CompressedVectorSiteContainer.h Bpp/Seq/Container/MapSequenceContainer.h Bpp/Seq/Container/OrderedSequenceContainer.h Bpp/Seq/Container/AbstractSequenceContainer.h Bpp/Seq/Container/AlignedSequenceContainer.h Bpp/Seq/Container/SequenceContainerExceptions.h Bpp/Seq/Container/SequenceContainer.h Bpp/Seq/Container/SequenceContainerTools.h Bpp/Seq/Container/SiteContainerExceptions.h Bpp/Seq/Container/SiteContainer.h Bpp/Seq/Container/SiteContainerTools.h Bpp/Seq/Container/SiteContainerIterator.h Bpp/Seq/AlphabetIndex/AlphabetIndex1.h Bpp/Seq/AlphabetIndex/AlphabetIndex2.h Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.h Bpp/Seq/AlphabetIndex/GranthamAAPolarityIndex.h Bpp/Seq/AlphabetIndex/GranthamAAVolumeIndex.h Bpp/Seq/AlphabetIndex/MiyataAAChemicalDistance.h Bpp/Seq/AlphabetIndex/BLOSUM50.h Bpp/Seq/AlphabetIndex/SimpleIndexDistance.h Bpp/Seq/AlphabetIndex/DefaultNucleotideScore.h Bpp/Seq/AlphabetIndex/AAIndex1Entry.h Bpp/Seq/AlphabetIndex/AAIndex2Entry.h Bpp/Seq/AlphabetIndex/KleinAANetChargeIndex.h Bpp/Seq/AlphabetIndex/AAChargeIndex.h Bpp/Seq/AlphabetIndex/SimpleScore.h Bpp/Seq/AlphabetIndex/AAChouFasmanAHelixIndex.h Bpp/Seq/AlphabetIndex/AAChouFasmanBSheetIndex.h Bpp/Seq/AlphabetIndex/AAChouFasmanTurnIndex.h Bpp/Seq/AlphabetIndex/AAVolumeIndex.h Bpp/Seq/AlphabetIndex/AASurfaceIndex.h Bpp/Seq/AlphabetIndex/AAMassIndex.h Bpp/Seq/AlphabetIndex/AAChenGuHuangHydrophobicityIndex.h Bpp/Seq/AlphabetIndex/AASEASup30Index.h Bpp/Seq/AlphabetIndex/AASEAInf10Index.h Bpp/Seq/AlphabetIndex/AASEA1030Index.h Bpp/Seq/App/SequenceApplicationTools.h ) # Build the static lib ADD_LIBRARY(bppseq-static STATIC ${CPP_FILES}) SET_TARGET_PROPERTIES(bppseq-static PROPERTIES OUTPUT_NAME bpp-seq CLEAN_DIRECT_OUTPUT 1 ) TARGET_LINK_LIBRARIES(bppseq-static ${LIBS}) # Build the shared lib ADD_LIBRARY(bppseq-shared SHARED ${CPP_FILES}) SET_TARGET_PROPERTIES(bppseq-shared PROPERTIES OUTPUT_NAME bpp-seq CLEAN_DIRECT_OUTPUT 1 VERSION ${BPPSEQ_VERSION} SOVERSION ${BPPSEQ_VERSION_MAJOR} ) TARGET_LINK_LIBRARIES(bppseq-shared ${LIBS}) # Install libs INSTALL(TARGETS bppseq-static bppseq-shared DESTINATION lib${LIB_SUFFIX}) # Install headers INSTALL(DIRECTORY Bpp/ DESTINATION include/Bpp FILES_MATCHING PATTERN "*.h") # Generate generic include files (.all) INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${CMAKE_SOURCE_DIR}/genIncludes.sh ${CMAKE_PREFIX_PATH}/include/Bpp)") bpp-seq-2.1.0/src/Bpp/Seq/SequenceWithAnnotationTools.h000644 000000 000000 00000015623 12147656566 022761 0ustar00rootroot000000 000000 // // File: SequenceWithAnnotationTools.h // Authors: Julien Dutheil // Created on: 06 Sep 2010 // /* Copyright or © or Copr. Bio++ Development Team, (Sep 06, 2010) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCEWITHANNOTATIONTOOLS_H_ #define _SEQUENCEWITHANNOTATIONTOOLS_H_ #include "SequenceTools.h" #include "SequenceWithAnnotation.h" #include namespace bpp { class SequenceMask : public virtual SequenceAnnotation { private: bool removable_; std::vector mask_; public: static const std::string MASK; public: /** * @name Constructors * @{ */ /** * @brief Build a new SequenceMask object * * Build a new SequenceMask object and set the mask to false. * * @param size The size of the sequence. * @param removable Tell if this listener can be removed by the user. */ SequenceMask(size_t size = 0, bool removable = true) : removable_(removable), mask_(size, false) {} /** * @brief Build a new SequenceMask object * * Build a new SequenceMask object and set the mask as a vector of bool. * * @param mask The boolean mask * @param removable Tell if this listener can be removed by the user. */ SequenceMask(const std::vector& mask, bool removable = true) : removable_(removable), mask_(mask) {} /** @} */ /** * @name Destructor * @{ */ virtual ~SequenceMask() {} /** @} */ /** * @name The Clonable interface * @{ */ #ifdef NO_VIRTUAL_COV Clonable* #else SequenceMask* #endif clone() const { return new SequenceMask(*this); } /** @} */ public: void init(const Sequence& seq) { mask_.resize(seq.size()); std::fill(mask_.begin(), mask_.end(), false); } const std::string& getType() const { return MASK; } bool isValidWith(const SequenceWithAnnotation& sequence, bool throwException = true) const { if (throwException && mask_.size() != sequence.size()) throw Exception("SequenceMask. The mask size must match the sequence size."); return (mask_.size() == sequence.size()); } bool isRemovable() const { return removable_; } bool isShared() const { return false; } void beforeSequenceChanged(const SymbolListEditionEvent& event) {} void afterSequenceChanged(const SymbolListEditionEvent& event); void beforeSequenceInserted(const SymbolListInsertionEvent& event) {} void afterSequenceInserted(const SymbolListInsertionEvent& event); void beforeSequenceDeleted(const SymbolListDeletionEvent& event) {} void afterSequenceDeleted(const SymbolListDeletionEvent& event); void beforeSequenceSubstituted(const SymbolListSubstitutionEvent& event) {} void afterSequenceSubstituted(const SymbolListSubstitutionEvent& event) {} size_t getSize() const { return mask_.size(); } const bool operator[](size_t i) const { return mask_[i]; } void setMask(const std::vector& mask) { if (mask.size() != mask_.size()) throw DimensionException("SequenceMask::setMask. Trying to replace mask by a vector with different length.", mask.size(), mask_.size()); mask_ = mask; } /** * @return The mask as a vector. */ const std::vector& getMask() const { return mask_; } void setMask(size_t pos, bool mask) { if (pos >= mask_.size()) throw Exception("SequenceMask::setMask. Vector overflow. Scores number: " + TextTools::toString(mask_.size()) + ", but trying to insert mask at position " + TextTools::toString(pos) + "."); mask_[pos] = mask; } void setMask(size_t pos, const std::vector& mask) { if (pos + mask.size() > mask_.size()) throw Exception("SequenceMask::setMask. Vector overflow. Scores number: " + TextTools::toString(mask_.size()) + ", but trying to insert " + TextTools::toString(mask.size()) + " scores at position " + TextTools::toString(pos) + "."); std::copy(mask.begin(), mask.end(), mask_.begin() + pos); } bool merge(const SequenceAnnotation& anno) { try { const SequenceMask* mask = & dynamic_cast(anno); VectorTools::append(mask_, mask->getMask()); return true; } catch (std::exception& e) { return false; } } SequenceAnnotation* getPartAnnotation(size_t pos, size_t len) const throw (Exception) { return new SequenceMask(std::vector(mask_.begin() + pos, mask_.begin() + pos + len), removable_); } }; /** * @brief The SequenceWithAnnotationTools static class * * Implement methods to manipulate SequencesWithAnnotation * * @author Julien Dutheil */ class SequenceWithAnnotationTools { public: /** * @brief Parse a sequence with a CaseMaskedAlphabet and creates a new SequenceWithAnnotation object with original alphabet and a mask annotation. * * @param seq The sequence to parse. * @return A new SequenceWithAnnotation object. * @throw AlphabetException if the input sequence does not have a CaseMaskedAlphabet. */ SequenceWithAnnotation* createMaskAnnotation(const Sequence& seq) throw (AlphabetException); }; } #endif // _SEQUENCEWITHANNOTATIONTOOLS_H_ bpp-seq-2.1.0/src/Bpp/Seq/StringSequenceTools.h000644 000000 000000 00000016751 12147656566 021264 0ustar00rootroot000000 000000 // // File: StringSequenceTools.h // Created by: Julien Dutheil // Created on: Sun Nov 30 11:29:07 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _STRINGSEQUENCETOOLS_H_ #define _STRINGSEQUENCETOOLS_H_ #include "Alphabet/Alphabet.h" #include "Alphabet/AlphabetExceptions.h" #include "SequenceExceptions.h" #include // From the STL: #include #include namespace bpp { /** * @brief Utilitary methods working on raw string objects. * * Sequences may be stored as strings, but this approach is not as * powerful as using true sequence objects. * Consider using the Sequence and SequenceTools classes for more methods. * * Some of the methods implemented here are internally used by the Sequence object. * * @see Sequence, Site, SequenceTools, SiteTools */ class StringSequenceTools { public: StringSequenceTools() {}; ~StringSequenceTools() {}; public: /** * @brief Get a subsequence. * * @param sequence The input sequence. * @param begin The begining position (included). * @param end The ending position (included). * @return A string with the subsequence. * @throw Exception If position does not not match the interval [0, length]. */ static std::string subseq(const std::string& sequence, int begin, int end) throw (Exception); /** * @brief Set up the size of a sequence from the right side. * * All new characters are filled with gaps. * If the specified size is < to the sequence size, the sequence will be truncated. * * @param sequence The input sequence. * @param size The new size of the sequence. */ static std::string setToSizeR(const std::string& sequence, int size); /** * @brief Set up the size of a sequence from the left side. * * All new characters are filled with gaps. * If the specified size is < to the sequence size, the sequence will be truncated. * * @param sequence The input sequence. * @param size The new size of the sequence. */ static std::string setToSizeL(const std::string& sequence, int size); /** * @brief Delete all occurence of a character in the sequence. * * @param sequence The sequence to parse. * @param chars The character to remove. * @return The sequence with all specified characters removed. */ static std::string deleteChar(const std::string& sequence, char chars); /** * @brief Delete all occurence of several characters in the sequence. * * @param sequence The sequence to parse. * @param chars The characters to remove. * @return The sequence with all specified characters removed. */ static std::string deleteChar(const std::string& sequence, std::string chars); /** * @brief Reverse the sequence. * * @param sequence The sequence to reverse. * @return The reversed sequence. */ static std::string* reverse(const std::string& sequence); /** * @brief Get the complement of a sequence. * @deprecated Consider working with sequence objects and translators. * * For this method, sequence is supposed to be of type DNA. * * @param sequence The sequence to complement. * @return The complementary sequence. * @see Sequence * @see NucleicAcidsReplication */ static std::string* complement(const std::string& sequence); /** * @brief Calculate the local GC content of a sequence. * * GC contents are calculated using a window of specified size around the given position. * Note : Calculus for last nucleotides (sequence's size - window size) will return * the last possible rate calculated. * * @param sequence The sequence to analyse. * @param pos The position where to compute the GC content. * @param window The size of the window to use. * @return The GC content as a ratio (# of GC / window). * @throw BadIntException If the sequence is not of type DNA or RNA. * @throw Exception Error in calculus (if the sequence contain gaps for instance). */ static double getGCcontent(const std::string& sequence, size_t pos, size_t window) throw (BadIntegerException, Exception); /** * @brief Convert a string sequence to a vector of int. * * This method is used in the sequence constructor. * This deals with the most simple cases: the sequence (string) contains * a succession of all characters. This is indeed the case for simple * alphabets, but may be more complicated if the alphabet is coded with * variable code length. * * @param sequence The sequence to parse. * @param alphabet The alphabet to use to code the sequence. * @return A vector of int codes. * @throw BarCharException If some character does not match the specified alphabet. */ static std::vector codeSequence(const std::string& sequence, const Alphabet* alphabet) throw (BadCharException); /** * @brief Convert a sequence to its string representation. * * @param sequence The sequence object to convert. * @param alphabet The alphabet to use to decode the sequence. * @return A string representation of the sequence. * @throw BarIntException If some value does not match the specified alphabet. */ static std::string decodeSequence(const std::vector& sequence, const Alphabet* alphabet) throw (BadIntException); /** * @brief Parse a sequence and try to guess the correct alphabet to use. * * @param sequence The sequence to parse. * @return A pointer toward a new Alphabet object. * @throw EmptySequenceException if the sequence is empty. * @throw SequenceException if their is an ambiguity between several alphabet. * @throw AlphabetException if the sequence does not match any alphabet. */ static Alphabet* getAlphabetFromSequence(const std::string& sequence) throw (EmptySequenceException, SequenceException, AlphabetException); }; } //end of namespace bpp. #endif //_STRINGSEQUENCETOOLS_H bpp-seq-2.1.0/src/Bpp/Seq/SiteExceptions.cpp000644 000000 000000 00000004125 12147656566 020575 0ustar00rootroot000000 000000 // File: SiteExceptions.cpp // Author: Julien Dutheil // Created On: dim mar 7 2004 /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SiteExceptions.h" #include "Site.h" #include using namespace bpp; using namespace std; /***************************************************************************/ SiteException::SiteException(const std::string& text, const Site* s) : Exception(text + (s != 0 ? "(" + TextTools::toString(s->getPosition()) + ")" : string(""))), site_(s) {}; /***************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/SequenceExceptions.cpp000644 000000 000000 00000005541 12147656566 021444 0ustar00rootroot000000 000000 // // File: SequenceExceptions.cpp // Created by: Julien Dutheil // Created on: Mon Nov 3 16:35:30 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SequenceExceptions.h" #include "Sequence.h" using namespace bpp; using namespace std; /******************************************************************************/ SequenceException::SequenceException(const std::string& text, const Sequence* seq) : Exception("SequenceException: " + text + (seq != 0 ? "(" + seq->getName() + ")" : string(""))), sequence_(seq) {}; /******************************************************************************/ EmptySequenceException::EmptySequenceException(const std::string& text, const Sequence* seq) : SequenceException("EmptySequenceException: " + text, seq) {}; /******************************************************************************/ SequenceWithGapException::SequenceWithGapException(const std::string& text, const Sequence* seq) : SequenceException("SequenceWithGapException: " + text, seq) {}; /******************************************************************************/ SequenceNotAlignedException::SequenceNotAlignedException(const std::string& text, const Sequence* seq) : SequenceException("SequenceNotAlignedException: " + text, seq) {}; /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Transliterator.cpp000644 000000 000000 00000005710 12147656566 020645 0ustar00rootroot000000 000000 // // File: Transliterator.cpp // Created by: Julien Dutheil // Created on: Sun Oct 12 14:25:25 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "Transliterator.h" using namespace bpp; Sequence* AbstractTransliterator::translate(const Sequence& sequence) const throw (AlphabetMismatchException, Exception) { if (sequence.getAlphabet()->getAlphabetType() != getSourceAlphabet()->getAlphabetType()) throw AlphabetMismatchException("AbstractTransliterator::translate", getSourceAlphabet(), getTargetAlphabet()); Sequence* tSeq = new BasicSequence(sequence.getName(), "", sequence.getComments(), getTargetAlphabet()); int gap = sequence.getAlphabet()->getGapCharacterCode(); for (unsigned int i = 0; i < sequence.size(); ++i) { int state = sequence.getValue(i); if (state == gap) tSeq->addElement(gap); else tSeq->addElement(translate(state)); } return tSeq; } Sequence* AbstractReverseTransliterator::reverse(const Sequence& sequence) const throw (AlphabetMismatchException, Exception) { if (sequence.getAlphabet()->getAlphabetType() != getTargetAlphabet()->getAlphabetType()) throw AlphabetMismatchException("AbstractReverseTransliterator::reverse", getSourceAlphabet(), getTargetAlphabet()); Sequence* rSeq = new BasicSequence(sequence.getName(), "", sequence.getComments(), getSourceAlphabet()); for (unsigned int i = 0; i < sequence.size(); ++i) { rSeq->addElement(reverse(sequence.getValue(i))); } return rSeq; } bpp-seq-2.1.0/src/Bpp/Seq/NucleicAcidsReplication.cpp000644 000000 000000 00000007743 12147656566 022360 0ustar00rootroot000000 000000 // // File: NucleicAcidsReplication.cpp // Created by: Julien Dutheil // Created on: Fri May 20 14:40 2005 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "NucleicAcidsReplication.h" using namespace bpp; using namespace std; NucleicAcidsReplication::NucleicAcidsReplication(const NucleicAlphabet* nuc1, const NucleicAlphabet* nuc2) : nuc1_(nuc1), nuc2_(nuc2), trans_() { trans_[-1] = -1; trans_[0] = 3; trans_[1] = 2; trans_[2] = 1; trans_[3] = 0; trans_[4] = 9; trans_[5] = 8; trans_[6] = 6; trans_[7] = 7; trans_[8] = 5; trans_[9] = 4; trans_[10] = 13; trans_[11] = 12; trans_[12] = 11; trans_[13] = 10; trans_[14] = 14; } int NucleicAcidsReplication::translate(int state) const throw (BadIntException) { nuc1_->intToChar(state); return trans_[state]; } std::string NucleicAcidsReplication::translate(const std::string& state) const throw (BadCharException) { int i = nuc1_->charToInt(state); return nuc2_->intToChar(trans_[i]); } Sequence* NucleicAcidsReplication::translate(const Sequence& sequence) const throw (AlphabetMismatchException) { if (sequence.getAlphabet()->getAlphabetType() != getSourceAlphabet()->getAlphabetType()) throw AlphabetMismatchException("NucleicAcidsReplication::translate", getSourceAlphabet(), getTargetAlphabet()); BasicSequence* tSeq = new BasicSequence(sequence.getName(), "", sequence.getComments(), getTargetAlphabet()); for (unsigned int i = 0; i < sequence.size(); i++) { tSeq->addElement(translate(sequence.getValue(i))); } //tSeq->setSense(!tSeq->getSense()); return tSeq; } int NucleicAcidsReplication::reverse(int state) const throw (BadIntException) { nuc2_->intToChar(state); return trans_[state]; } std::string NucleicAcidsReplication::reverse(const std::string& state) const throw (BadCharException) { int i = nuc2_->charToInt(state); return nuc1_->intToChar(trans_[i]); } Sequence* NucleicAcidsReplication::reverse(const Sequence& sequence) const throw (AlphabetMismatchException, Exception) { if (sequence.getAlphabet()->getAlphabetType() != getTargetAlphabet()->getAlphabetType()) throw AlphabetMismatchException("NucleicAcidsReplication::reverse", getSourceAlphabet(), getTargetAlphabet()); BasicSequence* rSeq = new BasicSequence(sequence.getName(), "", sequence.getComments(), getSourceAlphabet()); for (unsigned int i = 0; i < sequence.size(); i++) { rSeq->addElement(reverse(sequence.getValue(i))); } //rSeq->setSense(! rSeq->getSense()); return rSeq; } bpp-seq-2.1.0/src/Bpp/Seq/SequenceIterator.h000644 000000 000000 00000006143 12147656566 020560 0ustar00rootroot000000 000000 // // File: SequenceIterator.h // Created by: Julien Dutheil // Created on: Tue Feb 26 14:27 2013 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCEITERATOR_H_ #define _SEQUENCEITERATOR_H_ #include "Sequence.h" #include "SequenceWithQuality.h" namespace bpp { /** * @brief Generic sequence iterator interface, allowing to loop over sequences. */ class SequenceIterator { public: SequenceIterator() {} virtual ~SequenceIterator() {} public: virtual Sequence* nextSequence() = 0; virtual bool hasMoreSequences() const = 0; }; /** * @brief Generic const sequence iterator interface, allowing to loop over const sequences. */ class ConstSequenceIterator { public: ConstSequenceIterator() {} virtual ~ConstSequenceIterator() {} public: virtual const Sequence* nextSequence() = 0; virtual bool hasMoreSequences() const = 0; }; /** * @brief Generic sequence iterator interface, allowing to loop over sequences with quality scores. */ class SequenceWithQualityIterator: public virtual SequenceIterator { public: SequenceWithQualityIterator() {} virtual ~SequenceWithQualityIterator() {} public: virtual SequenceWithQuality* nextSequence() = 0; }; /** * @brief Generic const sequence iterator interface, allowing to loop over const sequences with quality scores. */ class ConstSequenceWithQualityIterator: public virtual ConstSequenceIterator { public: ConstSequenceWithQualityIterator() {} virtual ~ConstSequenceWithQualityIterator() {} public: virtual const SequenceWithQuality* nextSequence() = 0; }; } //end of namespace bpp. #endif //_SEQUENCEITERATOR_H_ bpp-seq-2.1.0/src/Bpp/Seq/SymbolList.h000644 000000 000000 00000055064 12147656566 017405 0ustar00rootroot000000 000000 // // File: SymbolList.h // Created by: Julien Dutheil // Created on: Fri Apr 9 2005 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SYMBOLLIST_H_ #define _SYMBOLLIST_H_ #include "Alphabet/Alphabet.h" #include // From the STL: #include #include #include #include namespace bpp { /** * @brief The SymbolList interface. * * @see Alphabet */ class SymbolList: public virtual Clonable { public: /** * @name The Clonable interface * * @{ */ #ifndef NO_VIRTUAL_COV SymbolList* clone() const = 0; #endif /** @} */ // Class destructor virtual ~SymbolList() {} public: /** * @brief Get the alphabet associated to the list. * * @return A const pointer to the alphabet. * @see Alphabet class. */ virtual const Alphabet* getAlphabet() const = 0; /** * @brief Get the number of elements in the list. * * @return The number of sites in the list. */ virtual size_t size() const = 0; /** * @name Acting on the content of the list. * * @{ */ /** * @brief Get the whole content of the list as a vector of int. * * @return A reference to the content of the list. */ virtual const std::vector& getContent() const = 0; /** * @brief Set the whole content of the list. * * @param list The new content of the list. * @see The list constructor for information about the way lists are internaly stored. */ virtual void setContent(const std::vector& list) throw (BadIntException) = 0; /** * @brief Set the whole content of the list. * * @param list The new content of the list. * @see The list constructor for information about the way lists are internaly stored. */ virtual void setContent(const std::vector& list) throw (BadCharException) = 0; /** @} */ /** * @brief Convert the list as a string. * * This method is useful for dumping a list to a file or to the screen for display. * * @return The whole list as a string. */ virtual std::string toString() const = 0; /** * @name Edition methods. * * @{ */ /** * @brief Add a character to the end of the list. * * @param c The character to add, given as a string. */ virtual void addElement(const std::string& c) throw (BadCharException) = 0; /** * @brief Add a character at a certain position in the list. * * @param pos The postion where to insert the element. * @param c The character to add, given as a string. */ virtual void addElement(size_t pos, const std::string& c) throw (BadCharException, IndexOutOfBoundsException) = 0; /** * @brief Set the element at position 'pos' to character 'c'. * * @param pos The position of the character to set. * @param c The value of the element, given as a string. */ virtual void setElement(size_t pos, const std::string& c) throw (BadCharException, IndexOutOfBoundsException) = 0; /** * @brief Delete the element at position 'pos'. * * @param pos The position of the element to delete. */ virtual void deleteElement(size_t pos) throw (IndexOutOfBoundsException) = 0; /** * @brief Delete the elements at position 'pos'. * * @param pos The position of the first element to delete. * @param len The length of the region to delete. */ virtual void deleteElements(size_t pos, size_t len) throw (IndexOutOfBoundsException) = 0; /** * @brief Get the element at position 'pos' as a character. * * @param pos The position of the character to retrieve. */ virtual std::string getChar(size_t pos) const throw (IndexOutOfBoundsException) = 0; /** * @brief Add a character to the end of the list. * * @param v The character to add, given as an int. */ virtual void addElement(int v) throw (BadIntException) = 0; /** * @brief Add a character at a certain position in the list. * * @param pos The postion where to insert the element. * @param v The character to add, given as an int. */ virtual void addElement(size_t pos, int v) throw (BadIntException, IndexOutOfBoundsException) = 0; /** * @brief Set the element at position 'pos' to character 'v'. * * @param pos The position of the character to set. * @param v The value of the element, given as an int. */ virtual void setElement(size_t pos, int v) throw (BadIntException, IndexOutOfBoundsException) = 0; /** * @brief Get the element at position 'pos' as an int. * * @param pos The position of the character to retrieve. */ virtual int getValue(size_t pos) const throw (IndexOutOfBoundsException) = 0; /** @} */ /** * @name Provide direct access to the list content. * * @warning These operators allow you to modifiy the list content. * No alphabet checking is performed for your modifications, so use with care, or * consider using the setContent() method. * * @{ */ /** * @brief Operator [] overloaded for quick access to a character in list. * * @param i The position to retrieve. * @return The integer value of character at position i. */ virtual const int& operator[](size_t i) const = 0; /** * @brief Operator [] overloaded for quick access to a character in list. * * @param i The position to retrieve. * @return The integer value of character at position i. */ virtual int& operator[](size_t i) = 0; /** * @brief Randomly shuffle the content of the list, with linear complexity. */ virtual void shuffle() = 0; /** @} */ }; /** * @brief A basic SymbolList object. * * This is a general purpose container, containing an ordered list of states(= letters). * The states that allowed to be present in the list are defined by an alphabet object, * which is passed to the list constructor by a pointer. * * For programming convenience, the states are stored as integers, but the translation toward * and from a char description is easily performed with the Alphabet classes. * * @see Alphabet */ class BasicSymbolList: public virtual SymbolList { private: /** * @brief The Alphabet attribute must be initialized in constructor and then can never be changed. * * To apply another alphabet to a list you'll have to create a new list. */ const Alphabet* alphabet_; protected: /** * @brief The list content. */ std::vector content_; public: /** * @brief Build a new void BasicSymbolList object with the specified alphabet. * * @param alpha The alphabet to use. */ BasicSymbolList(const Alphabet* alpha) : alphabet_(alpha), content_() {} /** * @brief Build a new BasicSymbolList object with the specified alphabet. * The content of the site is initialized from a vector of characters. * * @param list The content of the site. * @param alpha The alphabet to use. * @throw BadCharException If the content does not match the specified alphabet. */ BasicSymbolList(const std::vector& list, const Alphabet* alpha) throw (BadCharException); /** * @brief Build a new BasicSymbolList object with the specified alphabet. * The content of the site is initialized from a vector of integers. * * @param list The content of the site. * @param alpha The alphabet to use. * @throw BadIntException If the content does not match the specified alphabet. */ BasicSymbolList(const std::vector& list, const Alphabet* alpha) throw (BadIntException); /** * @brief The generic copy constructor. */ BasicSymbolList(const SymbolList& list); /** * @brief The copy constructor. */ BasicSymbolList(const BasicSymbolList& list); /** * @brief The generic assignment operator. */ BasicSymbolList& operator=(const SymbolList& list); /** * @brief The assignment operator. */ BasicSymbolList& operator=(const BasicSymbolList& list); /** * @name The Clonable interface * * @{ */ #ifdef NO_VIRTUAL_COV Clonable* #else BasicSymbolList* #endif clone() const { return new BasicSymbolList(* this); } /** @} */ // Class destructor virtual ~BasicSymbolList() {} public: virtual const Alphabet* getAlphabet() const { return alphabet_; } virtual size_t size() const { return static_cast(content_.size()); } virtual const std::vector& getContent() const { return content_; } virtual void setContent(const std::vector& list) throw (BadIntException); virtual void setContent(const std::vector& list) throw (BadCharException); virtual std::string toString() const; virtual void addElement(const std::string& c) throw (BadCharException); virtual void addElement(size_t pos, const std::string& c) throw (BadCharException, IndexOutOfBoundsException); virtual void setElement(size_t pos, const std::string& c) throw (BadCharException, IndexOutOfBoundsException); virtual void deleteElement(size_t pos) throw (IndexOutOfBoundsException); virtual void deleteElements(size_t pos, size_t len) throw (IndexOutOfBoundsException); virtual std::string getChar(size_t pos) const throw (IndexOutOfBoundsException); virtual void addElement(int v) throw (BadIntException); virtual void addElement(size_t pos, int v) throw (BadIntException, IndexOutOfBoundsException); virtual void setElement(size_t pos, int v) throw (BadIntException, IndexOutOfBoundsException); virtual int getValue(size_t pos) const throw (IndexOutOfBoundsException); virtual const int& operator[](size_t i) const { return content_[i]; } virtual int& operator[](size_t i) { return content_[i]; } virtual void shuffle() { random_shuffle(content_.begin(), content_.end()); } }; class SymbolListEditionEvent { private: SymbolList* list_; public: SymbolListEditionEvent(SymbolList* list): list_(list) {} SymbolListEditionEvent(const SymbolListEditionEvent& slee): list_(slee.list_) {} SymbolListEditionEvent& operator=(const SymbolListEditionEvent& slee) { list_ = slee.list_; return *this; } virtual ~SymbolListEditionEvent() {} public: virtual SymbolList* getSymbolList() { return list_; } virtual const SymbolList* getSymbolList() const { return list_; } }; class SymbolListInsertionEvent: public SymbolListEditionEvent { private: size_t pos_; size_t len_; public: SymbolListInsertionEvent(SymbolList* list, size_t pos, size_t len): SymbolListEditionEvent(list), pos_(pos), len_(len) {} public: virtual size_t getPosition() const { return pos_; } virtual size_t getLength() const { return len_; } }; class SymbolListDeletionEvent: public SymbolListEditionEvent { private: size_t pos_; size_t len_; public: SymbolListDeletionEvent(SymbolList* list, size_t pos, size_t len): SymbolListEditionEvent(list), pos_(pos), len_(len) {} public: virtual size_t getPosition() const { return pos_; } virtual size_t getLength() const { return len_; } }; class SymbolListSubstitutionEvent: public SymbolListEditionEvent { private: size_t begin_; size_t end_; public: SymbolListSubstitutionEvent(SymbolList* list, size_t begin, size_t end) : SymbolListEditionEvent(list), begin_(begin), end_(end) {} public: virtual size_t getBegin() const { return begin_; } virtual size_t getEnd() const { return end_; } }; class SymbolListListener : public virtual Clonable { public: virtual ~SymbolListListener() {} #ifndef NO_VIRTUAL_COV virtual SymbolListListener* clone() const = 0; #endif public: virtual bool isRemovable() const = 0; virtual bool isShared() const = 0; virtual void beforeSequenceChanged(const SymbolListEditionEvent& event) = 0; virtual void afterSequenceChanged(const SymbolListEditionEvent& event) = 0; virtual void beforeSequenceInserted(const SymbolListInsertionEvent& event) = 0; virtual void afterSequenceInserted(const SymbolListInsertionEvent& event) = 0; virtual void beforeSequenceDeleted(const SymbolListDeletionEvent& event) = 0; virtual void afterSequenceDeleted(const SymbolListDeletionEvent& event) = 0; virtual void beforeSequenceSubstituted(const SymbolListSubstitutionEvent& event) = 0; virtual void afterSequenceSubstituted(const SymbolListSubstitutionEvent& event) = 0; }; /** * @brief A event-driven SymbolList object. * * This is a general purpose container, containing an ordered list of states(= letters). * The states that allowed to be present in the list are defined by an alphabet object, * which is passed to the list constructor by a pointer. * * For programming convenience, the states are stored as integers, but the translation toward * and from a char description is easily performed with the Alphabet classes. * * @see Alphabet */ class EdSymbolList: public virtual SymbolList { private: /** * @brief The Alphabet attribute must be initialized in constructor and then can never be changed. * * To apply another alphabet to a list you'll have to create a new list. */ const Alphabet* alphabet_; bool propagateEvents_; protected: /** * @brief The list content. */ std::vector content_; /** * @brief Contains the listeners. */ std::vector listeners_; public: /** * @brief Build a new void BasicSymbolList object with the specified alphabet. * * @param alpha The alphabet to use. */ EdSymbolList(const Alphabet* alpha) : alphabet_(alpha), propagateEvents_(true), content_(), listeners_() {} /** * @brief Build a new BasicSymbolList object with the specified alphabet. * The content of the site is initialized from a vector of characters. * * @param list The content of the site. * @param alpha The alphabet to use. * @throw BadCharException If the content does not match the specified alphabet. */ EdSymbolList(const std::vector& list, const Alphabet* alpha) throw (BadCharException); /** * @brief Build a new BasicSymbolList object with the specified alphabet. * The content of the site is initialized from a vector of integers. * * @param list The content of the site. * @param alpha The alphabet to use. * @throw BadIntException If the content does not match the specified alphabet. */ EdSymbolList(const std::vector& list, const Alphabet* alpha) throw (BadIntException); /** * @brief The generic copy constructor. */ EdSymbolList(const SymbolList& list); /** * @brief The copy constructor. */ EdSymbolList(const EdSymbolList& list); /** * @brief The generic assignment operator. */ EdSymbolList& operator=(const SymbolList& list); /** * @brief The assignment operator. */ EdSymbolList& operator=(const EdSymbolList& list); /** * @name The Clonable interface * * @{ */ #ifdef NO_VIRTUAL_COV Clonable* #else EdSymbolList* #endif clone() const { return new EdSymbolList(* this); } /** @} */ // Class destructor virtual ~EdSymbolList() { for (size_t i = 0; i < listeners_.size(); ++i) { if (listeners_[i] && !listeners_[i]->isShared()) { delete listeners_[i]; } } } public: virtual const Alphabet* getAlphabet() const { return alphabet_; } virtual size_t size() const { return static_cast(content_.size()); } virtual const std::vector& getContent() const { return content_; } virtual void setContent(const std::vector& list) throw (BadIntException); virtual void setContent(const std::vector& list) throw (BadCharException); virtual std::string toString() const; virtual void addElement(const std::string& c) throw (BadCharException); virtual void addElement(size_t pos, const std::string& c) throw (BadCharException, IndexOutOfBoundsException); virtual void setElement(size_t pos, const std::string& c) throw (BadCharException, IndexOutOfBoundsException); virtual void deleteElement(size_t pos) throw (IndexOutOfBoundsException); virtual void deleteElements(size_t pos, size_t len) throw (IndexOutOfBoundsException); virtual std::string getChar(size_t pos) const throw (IndexOutOfBoundsException); virtual void addElement(int v) throw (BadIntException); virtual void addElement(size_t pos, int v) throw (BadIntException, IndexOutOfBoundsException); virtual void setElement(size_t pos, int v) throw (BadIntException, IndexOutOfBoundsException); virtual int getValue(size_t pos) const throw (IndexOutOfBoundsException); virtual const int& operator[](size_t i) const { return content_[i]; } virtual int& operator[](size_t i) { return content_[i]; } virtual void shuffle() { random_shuffle(content_.begin(), content_.end()); } /** * @name Events handling * * @{ */ virtual size_t getNumberOfListeners() const { return listeners_.size(); } virtual const SymbolListListener& getListener(size_t i) const { if (listeners_[i] == 0) std::cout << "aie!!!" << std::endl; return *listeners_[i]; } virtual SymbolListListener& getListener(size_t i) { if (listeners_[i] == 0) std::cout << "aie!!!" << std::endl; return *listeners_[i]; } virtual void addSymbolListListener(SymbolListListener* listener) { listeners_.push_back(listener); } virtual void removeSymbolListListener(SymbolListListener* listener) { if (listener->isRemovable()) listeners_.erase(remove(listeners_.begin(), listeners_.end(), listener), listeners_.end()); else throw Exception("EdSymbolList::removeSymbolListListener. Listener is not removable."); } protected: virtual void beforeSequenceChanged(const SymbolListEditionEvent& event) {}; virtual void afterSequenceChanged(const SymbolListEditionEvent& event) {}; virtual void beforeSequenceInserted(const SymbolListInsertionEvent& event) {}; virtual void afterSequenceInserted(const SymbolListInsertionEvent& event) {}; virtual void beforeSequenceDeleted(const SymbolListDeletionEvent& event) {}; virtual void afterSequenceDeleted(const SymbolListDeletionEvent& event) {}; virtual void beforeSequenceSubstituted(const SymbolListSubstitutionEvent& event) {}; virtual void afterSequenceSubstituted(const SymbolListSubstitutionEvent& event) {}; void fireBeforeSequenceChanged(const SymbolListEditionEvent& event) { beforeSequenceChanged(event); if (propagateEvents_) for (size_t i = 0; i < listeners_.size(); ++i) listeners_[i]->beforeSequenceChanged(event); } void fireAfterSequenceChanged(const SymbolListEditionEvent& event) { afterSequenceChanged(event); if (propagateEvents_) for (size_t i = 0; i < listeners_.size(); ++i) listeners_[i]->afterSequenceChanged(event); } void fireBeforeSequenceInserted(const SymbolListInsertionEvent& event) { beforeSequenceInserted(event); if (propagateEvents_) for (size_t i = 0; i < listeners_.size(); ++i) listeners_[i]->beforeSequenceInserted(event); } void fireAfterSequenceInserted(const SymbolListInsertionEvent& event) { afterSequenceInserted(event); if (propagateEvents_) for (size_t i = 0; i < listeners_.size(); ++i) listeners_[i]->afterSequenceInserted(event); } void fireBeforeSequenceDeleted(const SymbolListDeletionEvent& event) { beforeSequenceDeleted(event); if (propagateEvents_) for (size_t i = 0; i < listeners_.size(); ++i) listeners_[i]->beforeSequenceDeleted(event); } void fireAfterSequenceDeleted(const SymbolListDeletionEvent& event) { afterSequenceDeleted(event); if (propagateEvents_) for (size_t i = 0; i < listeners_.size(); ++i) listeners_[i]->afterSequenceDeleted(event); } void fireBeforeSequenceSubstituted(const SymbolListSubstitutionEvent& event) { beforeSequenceSubstituted(event); if (propagateEvents_) for (size_t i = 0; i < listeners_.size(); ++i) listeners_[i]->beforeSequenceSubstituted(event); } void fireAfterSequenceSubstituted(const SymbolListSubstitutionEvent& event) { afterSequenceSubstituted(event); if (propagateEvents_) for (size_t i = 0; i < listeners_.size(); ++i) listeners_[i]->afterSequenceSubstituted(event); } /** @} */ protected: void propagateEvents(bool yn) { propagateEvents_ = yn; } bool propagateEvents() const { return propagateEvents_; } }; } //end of namespace bpp. #endif // _SYMBOLLIST_H_ bpp-seq-2.1.0/src/Bpp/Seq/SequenceTools.h000644 000000 000000 00000034425 12147656566 020073 0ustar00rootroot000000 000000 // // File: SequenceTools.h // Authors: Guillaume Deuchst // Julien Dutheil // Sylvain Gaillard // Created on: Tue Aug 21 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCETOOLS_H_ #define _SEQUENCETOOLS_H_ #include "Alphabet/Alphabet.h" #include "Alphabet/DNA.h" #include "Alphabet/RNA.h" #include "Alphabet/RNY.h" #include "Sequence.h" #include "SymbolListTools.h" #include "NucleicAcidsReplication.h" #include #include #include // From the STL: #include #include #include #include namespace bpp { /** * @brief Bowker's homogeneity test results class. */ class BowkerTest : public StatTest { private: double pvalue_; double stat_; public: BowkerTest() : pvalue_(1.), stat_(0.) {} virtual ~BowkerTest() {} BowkerTest* clone() const { return new BowkerTest(*this); } public: std::string getName() const { return "Bowker's test for homogeneity."; } double getStatistic() const { return stat_; } double getPValue() const { return pvalue_; } void setStatistic(double stat) { stat_ = stat; } void setPValue(double pvalue) { pvalue_ = pvalue; } }; /** * @brief SequenceTools static class * * Implement methods to manipulate sequences */ class SequenceTools : public SymbolListTools { private: static DNA _DNA; static RNA _RNA; static RNY _RNY; static NucleicAcidsReplication _DNARep; static NucleicAcidsReplication _RNARep; static NucleicAcidsReplication _transc; public: SequenceTools() {} virtual ~SequenceTools() {} public: /** * @brief Get a sub-sequence. * * @param sequence The sequence to trunc. * @param begin The first position of the subsequence. * @param end The last position of the subsequence. * @return A new sequence object with the given subsequence. * @throw IndexOutOfBoundsException, Exception In case of bad indices. */ static Sequence* subseq(const Sequence& sequence, size_t begin, size_t end) throw (IndexOutOfBoundsException, Exception); /** * @brief Concatenate two sequences. * * Sequences must have the same name and alphabets. * Only first sequence's commentaries are kept. * * @param seq1 The first sequence. * @param seq2 The second sequence. * @return A new sequence object with the concatenation of the two sequences. * @throw AlphabetMismatchException If the two alphabets do not match. * @throw Exception If the sequence names do not match. */ static Sequence* concatenate(const Sequence& seq1, const Sequence& seq2) throw (AlphabetMismatchException, Exception); /** * @brief Complement the nucleotide sequence itself * * @param seq The sequence to be complemented. * @return A ref toward the complemented sequence. * @throw AlphabetException if the sequence is not a nucleotide sequence. * @author Sylvain Gaillard */ static Sequence& complement(Sequence& seq) throw (AlphabetException); /** * @brief Get the complementary sequence of a nucleotide sequence. * * @see DNAReplication * @return A new sequence object with the complementary sequence. * @param sequence The sequence to complement. * @throw AlphabetException If the sequence is not a nucleotide sequence. */ static Sequence* getComplement(const Sequence& sequence) throw (AlphabetException); /** * @brief Get the transcription sequence of a DNA sequence. * * Translate DNA sequence into RNA sequence. * * @see DNAReplication * @return sequence A new sequence object with the transcription sequence. * @param sequence The sequence to transcript. * @throw AlphabetException If the sequence is not a DNA sequence. */ static Sequence* transcript(const Sequence& sequence) throw (AlphabetException); /** * @brief Get the reverse-transcription sequence of a RNA sequence. * * Translate RNA sequence into DNA sequence. * * @see DNAReplication * @return sequence A new sequence object with the reverse-transcription sequence. * @param sequence The sequence to reverse-transcript. * @throw AlphabetException If the sequence is not a RNA sequence. */ static Sequence* reverseTranscript(const Sequence& sequence) throw (AlphabetException); /** * @brief Inverse a sequence from 5'->3' to 3'->5' and vice-versa. * * ABCDEF becomes FEDCBA, and the sense attribute is changed (may be * inhibited). * * @param seq The sequence to inverse. * @return A ref toward the sequence. * @author Sylvain Gaillard */ static Sequence& invert(Sequence& seq); /** * @brief Inverse a sequence from 5'->3' to 3'->5' and vice-versa. * * ABCDEF becomes FEDCBA, and the sense attribute is changed (may be * inhibited). * * @param sequence The sequence to inverse. * @return A new sequence object containing the inverted sequence. * @author Sylvain Gaillard */ static Sequence* getInvert(const Sequence& sequence); /** * @brief Inverse and complement a sequence. * * This methode is more accurate than calling invert and complement * separatly. * * @param seq The sequence to inverse and complement. * @return A ref toward the sequence. * @author Sylvain Gaillard */ static Sequence& invertComplement(Sequence& seq); /** * @return The identity percent of 2 sequence. * One match is counted if the two sequences have identical states. * @param seq1 The first sequence. * @param seq2 The second sequence. * @param ignoreGaps If true, only positions without gaps will be used for the counting. * @throw AlphabetMismatchException If the two sequences do not have the same alphabet. * @throw SequenceNotAlignedException If the two sequences do not have the same length. */ static double getPercentIdentity(const Sequence& seq1, const Sequence& seq2, bool ignoreGaps = false) throw (AlphabetMismatchException, SequenceNotAlignedException); /** * @return The number of sites in the sequences, i.e. all positions without gaps. * * @param seq The sequence to analyse. */ static size_t getNumberOfSites(const Sequence& seq); /** * @return The number of complete sites in the sequences, i.e. all positions without gaps and unresolved states (generic characters). * * @param seq The sequence to analyse. */ static size_t getNumberOfCompleteSites(const Sequence& seq); /** * @return The number of unresolved sites in the sequence. * * @param seq The sequence to analyse. * * @author Sylvain Gaillard */ static size_t getNumberOfUnresolvedSites(const Sequence& seq); /** * @brief Remove gaps from a sequence. * * The deleteElement method of the Sequence object will be used where appropriate. * @param seq The sequence to analyse. */ static void removeGaps(Sequence& seq); /** * @brief Get a copy of the sequence without gaps. * * A whole new sequence will be created by adding all non-gap positions. * The original sequence will be cloned to serve as a template. * * @param seq The sequence to analyse. * @return A new sequence object without gaps. */ static Sequence* getSequenceWithoutGaps(const Sequence& seq); /** * @brief Remove stops from a codon sequence. * * The deleteElement method of the Sequence object will be used where appropriate. * @param seq The sequence to analyse. * @throw Exception if the input sequence does not have a codon alphabet. */ static void removeStops(Sequence& seq) throw (Exception); /** * @brief Get a copy of the codon sequence without stops. * * A whole new sequence will be created by adding all non-stop positions. * The original sequence will be cloned to serve as a template. * * @param seq The sequence to analyse. * @return A new sequence object without stops. * @throw Exception if the input sequence does not have a codon alphabet. */ static Sequence* getSequenceWithoutStops(const Sequence& seq) throw (Exception); /** * @brief Replace stop codons by gaps. * * The setElement method of the Sequence object will be used where appropriate. * @param seq The sequence to analyse. * @throw Exception if the input sequence does not have a codon alphabet. */ static void replaceStopsWithGaps(Sequence& seq) throw (Exception); /** * @brief Bowker's test for homogeneity. * * Computes the contingency table of occurrence of all pairs of states and test its symmetry using Bowker's (1948) test. * * Reference:
* @code * Ababneh F. Bioinformatics 2006 22(10) 1225-1231 * @endcode * * @param seq1 The first sequence. * @param seq2 The second sequence. * @return A BowkerTest object with the computed statistic and p-value (computed from a chi square distribution). * @throw SequenceNotAlignedException If the two sequences do not have the same length. */ static BowkerTest* bowkerTest(const Sequence& seq1, const Sequence& seq2) throw (SequenceNotAlignedException); /** * @brief Get all putatives haplotypes from an heterozygous sequence. * * @param seq The sequence to resolve * @param hap The vector to fill with the new sequences * @param level The maximum number of states that a generic char must code * (if this number is higher than level, the state will not be resolved). * For instance if level = 3 and Alphabet is DNA, all generic char will be * resolved but N. * * @author Sylvain Gaillard */ static void getPutativeHaplotypes(const Sequence& seq, std::vector& hap, unsigned int level = 2); /** * @brief Combine two sequences. * * @author Sylvain Gaillard */ static Sequence* combineSequences(const Sequence& s1, const Sequence& s2) throw (AlphabetMismatchException); /** * @brief Subtract haplotype from an heterozygous sequence. * * Subtract an haplotype (i.e. a fully resolved sequence) from an heterozygous * sequence to get the other haplotype. The new haplotype could be an unresolved * sequence if unresolved characters in the sequence code for more than 2 states. * * For example:
* @code * >heterozygous sequence * ATTCGGGKWTATRYRM * >haplotype * ATTCGGGTATATGCAA * >subtracted haplotype * ATTCGGGGTTATATGC * @endcode * * @param s The heterozygous sequence. * @param h The haplotype to subtract. * @param name The name of the new computed haplotype. * @param level The number of states from which the site is set to fully unresolved. * @throw SequenceNotAlignedException if s and h don't have the same size. * * @author Sylvain Gaillard */ static Sequence* subtractHaplotype(const Sequence& s, const Sequence& h, std::string name = "", unsigned int level = 1) throw (SequenceNotAlignedException); /** * @brief Get the RNY decomposition of a DNA sequence; with a given * phase between 1 and 3, it gives the decomposition in this phase; * in phase 1, the first triplet is centered on the first character. * Without a phase the function gives the alternative succession in * phases 1, 2 and 3. * * @return sequence A new sequence object with the transcription sequence. * @param sequence The sequence to transcript. * @param ph The phase to use (1,2 or 3). * @throw AlphabetException If the sequence is not a DNA sequence. * * @author Laurent Guéguen */ static Sequence* RNYslice(const Sequence& sequence, int ph) throw (AlphabetException); static Sequence* RNYslice(const Sequence& sequence) throw (AlphabetException); /** * @brief Extract CDS part from a codon sequence. Optionally check for intiator and stop codons, or both. * * @param sequence The sequence to be reduced to CDS part. * @param checkInit If true, then everything before the initiator codon will be removed, together with the initiator codon if includeInit is false. * @param checkStop If true, then everything after the first stop codon will be removed, together with the stop codon if includeStop is false. * @param includeInit Tell if initiator codon should be kept or removed. No effect if checkInit is false. * @param includeStop Tell if stop codon should be kept or removed. No effect if checkStop is false. */ static void getCDS(Sequence& sequence, bool checkInit, bool checkStop, bool includeInit = true, bool includeStop = true); /** * @brief Find the position of a motif in a sequence * * @param seq The reference sequence * @param motif The motif to find * @param strict If true (default) find exactly the motif * If false find compatible match * @return The position of the first occurence of the motif or the seq * length. */ static size_t findFirstOf(const Sequence& seq, const Sequence& motif, bool strict = true); }; } // end of namespace bpp. #endif // _SEQUENCETOOLS_H_ bpp-seq-2.1.0/src/Bpp/Seq/SequencePositionIterators.cpp000644 000000 000000 00000012310 12147656566 023014 0ustar00rootroot000000 000000 // // File: SequencePositionIterators.cpp // Author: Sylvain Gaillard // Created: 23/06/2009 11:38:27 // /* Copyright or © or Copr. Bio++ Development Team, (June 23, 2009) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SequencePositionIterators.h" using namespace bpp; using namespace std; // for the STL //================================= // AbstractSequencePositionIterator //================================= /******************************************************************************/ bool AbstractSequencePositionIterator::operator==(const SequencePositionIterator & it) const { return this->getPosition() == it.getPosition(); } /******************************************************************************/ bool AbstractSequencePositionIterator::operator!=(const SequencePositionIterator & it) const { return this->getPosition() != it.getPosition(); } /******************************************************************************/ void AbstractSequencePositionIterator::setPosition(unsigned int pos) { this->currentPosition_ = pos; } /******************************************************************************/ const Sequence & AbstractSequencePositionIterator::getSequence() const { return * (this->sequence_); } /******************************************************************************/ unsigned int AbstractSequencePositionIterator::getPosition() const { return this->currentPosition_; } /******************************************************************************/ int AbstractSequencePositionIterator::getValue() const { return this->sequence_->getValue(this->currentPosition_); } /******************************************************************************/ string AbstractSequencePositionIterator::getChar() const { return this->sequence_->getChar(this->currentPosition_); } //=============================== // SimpleSequencePositionIterator //=============================== /******************************************************************************/ SimpleSequencePositionIterator::SimpleSequencePositionIterator(const SequencePositionIterator& it): AbstractSequencePositionIterator(it.getSequence(), it.getPosition()) {}; /******************************************************************************/ SimpleSequencePositionIterator & SimpleSequencePositionIterator::operator++() { this->setPosition(this->getPosition() + 1); return *this; } /******************************************************************************/ SimpleSequencePositionIterator SimpleSequencePositionIterator::operator++(int i) { SimpleSequencePositionIterator ans = * this; ++(* this); return ans; } /******************************************************************************/ SimpleSequencePositionIterator & SimpleSequencePositionIterator::operator+=(int i) { this->setPosition(this->getPosition() + i); return * this; } /******************************************************************************/ SimpleSequencePositionIterator & SimpleSequencePositionIterator::operator-=(int i) { return (* this) += -i; } /******************************************************************************/ SimpleSequencePositionIterator SimpleSequencePositionIterator::operator+(int i) const { SimpleSequencePositionIterator res(* this); res += i; return res; } /******************************************************************************/ SimpleSequencePositionIterator SimpleSequencePositionIterator::operator-(int i) const { return (* this) + (- i); } /******************************************************************************/ bool SimpleSequencePositionIterator::hasMorePositions() const { return (this->getPosition() < this->getSequence().size()); } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/StringSequenceTools.cpp000644 000000 000000 00000023277 12147656566 021620 0ustar00rootroot000000 000000 // // File: StringSequenceTools.cpp // Created by: Julien Dutheil // Created on: Sun Nov 30 11:29:07 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "StringSequenceTools.h" #include "Alphabet/AlphabetTools.h" #include "Alphabet/DNA.h" #include "Alphabet/RNA.h" #include "Alphabet/ProteicAlphabet.h" #include #include using namespace bpp; // From the STL: #include #include #include #include using namespace std; /****************************************************************************************/ string StringSequenceTools::subseq(const string& sequence, int begin, int end) throw (Exception) { // Checking interval if (end < begin) throw Exception ("StringSequenceTools::subseq: Invalid interval"); // Copy sequence string temp(sequence); // Truncate sequence temp.erase(temp.begin() + end + 1, temp.end()); temp.erase(temp.begin(), temp.begin() + begin); // Send result return temp; } /****************************************************************************************/ string StringSequenceTools::setToSizeR(const string& sequence, int size) { return TextTools::resizeRight(sequence, size, '-'); } string StringSequenceTools::setToSizeL(const string& sequence, int size) { return TextTools::resizeLeft(sequence, size, '-'); } /****************************************************************************************/ string StringSequenceTools::deleteChar(const string& sequence, char chars) { // Copy sequence string result(sequence); // Search and delete specified char for (unsigned int i = 0; i < result.size(); i++) { if (result[i] == chars) result.erase(result.begin() + i); } return result; } /****************************************************************************************/ string StringSequenceTools::deleteChar(const string& sequence, string chars) { // Copy sequence string result(sequence); // For all characters to delete for (unsigned int i = 0; i < chars.size(); i++) { // Search and delete char for (unsigned int j = 0; j < result.size(); j++) { if (result[j] == chars[i]) result.erase(result.begin() + j); } } return result; } /****************************************************************************************/ string* StringSequenceTools::reverse(const string& sequence) { // Initializing string* result = new string; // Main loop : reverse all characters of sequence size_t size = sequence.size(); for (size_t i = 0; i < size; i++) { *result += sequence[size - i - 1]; } // Send result return result; } /****************************************************************************************/ string* StringSequenceTools::complement(const string& sequence) { // Initializing string* result = new string; // Main loop : completement all characters size_t size = sequence.size(); for (unsigned int i = 0; i < size; i++) { switch (sequence[i]) { case 'A': *result += 'T'; break; case 'C': *result += 'G'; break; case 'G': *result += 'C'; break; case 'T': *result += 'A'; break; case 'M': *result += 'K'; break; case 'R': *result += 'Y'; break; case 'Y': *result += 'R'; break; case 'K': *result += 'M'; break; case 'V': *result += 'B'; break; case 'H': *result += 'D'; break; case 'D': *result += 'H'; break; case 'B': *result += 'V'; break; default: *result += sequence[i]; break; } } // Send new sequence return result; } /****************************************************************************************/ double StringSequenceTools::getGCcontent(const string& sequence, size_t pos, size_t window) throw (BadIntegerException, Exception) { // Frequency counts for nucleotids A, C, G, T map counts; // Window size checking if (window < sequence.size()) throw BadIntegerException("StringSequenceTools::getGCContent : specified window too high", static_cast(window)); // For last nucleotides if (pos + window > sequence.size()) { pos = sequence.size() - window; } // Main loop for (size_t i = pos; i < pos + window; i++) { switch (toupper(sequence[i])) { case 'A': counts['A'] += 1; break; case 'C': counts['C'] += 1; break; case 'G': counts['G'] += 1; break; case 'T': counts['T'] += 1; break; case 'M': counts['A'] += 0.5; counts['C'] += 0.5; break; case 'R': counts['A'] += 0.5; counts['G'] += 0.5; break; case 'W': counts['A'] += 0.5; counts['T'] += 0.5; break; case 'S': counts['C'] += 0.5; counts['G'] += 0.5; break; case 'Y': counts['C'] += 0.5; counts['T'] += 0.5; break; case 'K': counts['G'] += 0.5; counts['T'] += 0.5; break; case 'V': counts['A'] += 0.34; counts['C'] += 0.34; counts['G'] += 0.34; break; case 'H': counts['A'] += 0.34; counts['C'] += 0.34; counts['T'] += 0.34; break; case 'D': counts['A'] += 0.34; counts['G'] += 0.34; counts['T'] += 0.34; break; case 'B': counts['C'] += 0.34; counts['G'] += 0.34; counts['T'] += 0.34; break; case '-': throw Exception("StringSequenceTools::getGCContent : Gap found in sequence"); break; // Unresolved bases default: counts['A'] += 0.25; counts['C'] += 0.25; counts['G'] += 0.25; counts['T'] += 0.25; } } // Calculate and send GC rate return (counts['G'] + counts['C']) / static_cast(window); } /****************************************************************************************/ vector StringSequenceTools::codeSequence(const string& sequence, const Alphabet* alphabet) throw (BadCharException) { unsigned int size = AlphabetTools::getAlphabetCodingSize(alphabet); // Warning, an exception may be casted here! vector code((int)floor((double)sequence.size() / (double)size)); unsigned int pos = 0; unsigned int count = 0; while (pos + size <= sequence.size()) { code[count] = alphabet->charToInt(sequence.substr(pos, size)); count++; pos += size; } return code; } /****************************************************************************************/ string StringSequenceTools::decodeSequence(const vector& sequence, const Alphabet* alphabet) throw (BadIntException) { string result = ""; for (unsigned int i = 0; i < sequence.size(); i++) { result += alphabet->intToChar(sequence[i]); } return result; } /****************************************************************************************/ Alphabet* StringSequenceTools::getAlphabetFromSequence(const std::string& sequence) throw (EmptySequenceException, SequenceException, AlphabetException) { // empty sequence test if (sequence.size() == 0) { throw EmptySequenceException("Sequence::getAlphabetFromSequence : Empty sequence string"); } // initialisation bool p = false; // indicates that a protein specific character is found bool r = false; // indicates that a RNA specific character is found bool u = false; // indicates that an unknown character is found bool pd = false; // Protein or DNA (T) // Main loop : for all character in sequence for (unsigned int i = 0; i < sequence.size(); i++) { // Character analyse switch (AlphabetTools::getType(sequence[i])) { case 0: u = true; break; case 3: p = true; break; case 2: r = true; break; case 5: pd = true; break; } } if (u) throw AlphabetException ("Sequence::getAlphabetFromSequence : Unknow character detected in specified sequence"); if (r && pd) throw SequenceException ("Sequence::getAlphabetFromSequence : Both 'T' and 'U' in the same sequence!"); if (r && p) throw SequenceException ("Sequence::getAlphabetFromSequence : Protein character and 'U' in the same sequence!"); if (p) return new ProteicAlphabet(); if (r) return new RNA(); return new DNA(); } /****************************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/SiteTools.h000644 000000 000000 00000021026 12147656566 017220 0ustar00rootroot000000 000000 // // File SiteTools.h // Author: Julien Dutheil // Guillaume Deuchst // Last modification : Friday August 8 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SITETOOLS_H_ #define _SITETOOLS_H_ #include "SymbolListTools.h" #include "Site.h" #include // From the STL: #include namespace bpp { /** * @brief Utilitary methods dealing with sites. */ class SiteTools : public SymbolListTools { public: SiteTools() {} virtual ~SiteTools() {} public: /** * @param site A site. * @return True if the site contains one or several gap(s). */ static bool hasGap(const Site& site); /** * @param site A site. * @return True if the site contains only gaps. */ static bool isGapOnly(const Site& site); /** * @param site A site. * @return True if the site contains only gaps. */ static bool isGapOrUnresolvedOnly(const Site& site); /** * @param site A site. * @return True if the site contains one or several unknwn characters. */ static bool hasUnknown(const Site& site); /** * @param site A site. * @return True if the site contains a Stop Codon, when the alphabet is a CodonAlphabet. */ static bool hasStopCodon(const Site& site); /** * @param site A site. * @return True if the site contains no gap and no unknown characters. */ static bool isComplete(const Site& site); /** * @brief Tell if a site is constant, that is displaying the same state in all sequences that do not present a gap. * * @param site A site. * @param ignoreUnknown If true, positions with unknown positions will be ignored. * Otherwise, a site with one single state + any uncertain state will not be considered as constant. * @param unresolvedRaisesException In case of ambiguous case (gap only site for instance), throw an exception. Otherwise returns false. * @return True if the site is made of only one state. * @throw EmptySiteException If the site has size 0 or if the site cannot be resolved (for instance is made of gaps only) and unresolvedRaisesException is set to true. */ static bool isConstant(const Site& site, bool ignoreUnknown = false, bool unresolvedRaisesException = true) throw (EmptySiteException); /** * @param site1 The first site. * @param site2 The second site. * @return True if the two states have the same content (and, of course, alphabet). */ static bool areSitesIdentical(const Site& site1, const Site& site2); /** * @brief Compute the Shannon entropy index of a site. * * \f[ * I = - \sum_x f_x\cdot \ln(f_x) * \f] * where \f$f_x\f$ is the frequency of state \f$x\f$. * * @author J. Dutheil * @param site A site. * @param resolveUnknowns Tell is unknown characters must be resolved. * @return The Shannon entropy index of this site. * @throw EmptySiteException If the site has size 0. */ static double variabilityShannon(const Site& site, bool resolveUnknowns) throw (EmptySiteException); /** * @brief Compute the factorial diversity index of a site. * * \f[ * F = \frac{log\left(\left(\sum_x p_x\right)!\right)}{\sum_x \log(p_x)!} * \f] * where \f$p_x\f$ is the number of times state \f$x\f$ is observed in the site. * * @author J. Dutheil * @param site A site. * @return The factorial diversity index of this site. * @throw EmptySiteException If the site has size 0. */ static double variabilityFactorial(const Site& site) throw (EmptySiteException); /** * @brief Compute the mutual information between two sites. * * \f[ * MI = \sum_x \sum_y p_{x,y}\ln\left(\frac{p_{x,y}}{p_x \cdot p_y}\right) * \f] * where \f$p_x\f$ and \f$p_y\f$ are the frequencies of states \f$x\f$ and \f$y\f$, and * \f$p_{x,y}\f$ is the frequency of the pair \f$(x,y)\f$. * * @author J. Dutheil * @param site1 First site * @param site2 Second site * @param resolveUnknowns Tell is unknown characters must be resolved. * @return The mutual information for the pair of sites. * @throw DimensionException If the sites do not have the same length. * @throw EmptySiteException If the sites have size 0. */ static double mutualInformation(const Site& site1, const Site& site2, bool resolveUnknowns) throw (DimensionException,EmptySiteException); /** * @brief Compute the entropy of a site. This is an alias of method variabilityShannon. * * \f[ * I = - \sum_x f_x\cdot \ln(f_x) * \f] * where \f$f_x\f$ is the frequency of state \f$x\f$. * * @author J. Dutheil * @param site A site. * @param resolveUnknowns Tell is unknown characters must be resolved. * @return The Shannon entropy index of this site. * @throw EmptySiteException If the site has size 0. */ static double entropy(const Site& site, bool resolveUnknowns) throw (EmptySiteException) { return variabilityShannon(site, resolveUnknowns); } /** * @brief Compute the joint entropy between two sites. * * \f[ * H_{i,j} = - \sum_x \sum_y p_{x,y}\ln\left(p_{x,y}\right) * \f] * where \f$p_{x,y}\f$ is the frequency of the pair \f$(x,y)\f$. * * @author J. Dutheil * @param site1 First site * @param site2 Second site * @param resolveUnknowns Tell is unknown characters must be resolved. * @return The mutual information for the pair of sites. * @throw DimensionException If the sites do not have the same length. * @throw EmptySiteException If the sites have size 0. */ static double jointEntropy(const Site& site1, const Site& site2, bool resolveUnknowns) throw (DimensionException,EmptySiteException); /** * @brief Compute the heterozygosity index of a site. * * \f[ * H = 1 - \sum_x f_x^2 * \f] * where \f$f_x\f$ is the frequency of state \f$x\f$. * * @param site A site. * @return The heterozygosity index of this site. * @throw EmptySiteException If the site has size 0. */ static double heterozygosity(const Site& site) throw (EmptySiteException); /** * @brief Give the number of distinct characters at a site. * * @param site a Site * @return The number of distinct characters in the given site. */ static size_t getNumberOfDistinctCharacters(const Site& site) throw (EmptySiteException); /** * @brief Tell if a site has singletons * * * @param site a Site. * @return True if the site has singletons. */ static bool hasSingleton(const Site& site) throw (EmptySiteException); /** * @brief Tell if a site is a parsimony informative site. * * At least two distinct characters must be present. * * @param site a Site. * @return True if the site is parsimony informative. */ static bool isParsimonyInformativeSite(const Site& site) throw (EmptySiteException); /** * @brief Tell if a site has more than 2 distinct characters * * @param site a Site. * @return True if the site has more than 2 distinct characters */ static bool isTriplet(const Site& site) throw (EmptySiteException); }; } // end of namespace bpp. #endif // _SITETOOLS_H_ bpp-seq-2.1.0/src/Bpp/Seq/SequenceWalker.cpp000644 000000 000000 00000006614 12147656566 020552 0ustar00rootroot000000 000000 // // File: SequenceWalker.cpp // Created by: Julien Dutheil // Created on: Thu Nov 24 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2011) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include using namespace std; #include "SequenceWalker.h" #include using namespace bpp; size_t SequenceWalker::getAlignmentPosition(size_t seqPos) throw (Exception) { if (seqPos == seqPos_) return alnPos_; if (seqPos > seqPos_) { //Move forward while (alnPos_ < seq_->size() && seqPos_ < seqPos) { if (alnPos_ == seq_->size() - 1) throw Exception("SequenceWalker::getAlignmentPosition(). Forward1. Position out of bound."); ++alnPos_; if ((*seq_)[alnPos_] != gap_) { ++seqPos_; } } if (seqPos_ != seqPos) throw Exception("SequenceWalker::getAlignmentPosition(). Forward2. Position out of bound (" + TextTools::toString(alnPos_) + ")"); } else { //Move backward if (alnPos_ == 0) throw Exception("SequenceWalker::getAlignmentPosition(). Backward. Position out of bound."); while (alnPos_ > 0 && seqPos_ > seqPos) { --alnPos_; if ((*seq_)[alnPos_] != gap_) { --seqPos_; } } if (seqPos_ != seqPos) throw Exception("SequenceWalker::getAlignmentPosition(). Position out of bound."); } return alnPos_; } size_t SequenceWalker::getSequencePosition(size_t alnPos) throw (Exception) { if (alnPos == alnPos_) return seqPos_; if (alnPos >= seq_->size()) throw Exception("SequenceWalker::getSequencePosition(). Position out of bound."); if (alnPos > alnPos_) { //Move forward while (alnPos_ < alnPos) { ++alnPos_; if ((*seq_)[alnPos_] != gap_) { ++seqPos_; } } } else { //Move backward while (alnPos_ > alnPos) { if (seqPos_ == 0) return 0; --alnPos_; if ((*seq_)[alnPos_ + 1] != gap_) { --seqPos_; } } } return seqPos_; } bpp-seq-2.1.0/src/Bpp/Seq/SequenceWithAnnotation.cpp000644 000000 000000 00000024015 12147656566 022266 0ustar00rootroot000000 000000 // // File: SequenceWithAnnotation.cpp // Created by: Julien Dutheil // Created on: Mon Jul 19 2010 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SequenceWithAnnotation.h" // class's header file #include "Alphabet/AlphabetTools.h" #include "StringSequenceTools.h" #include using namespace bpp; // From the STL: #include using namespace std; /* Constructors: **************************************************************/ SequenceWithAnnotation::SequenceWithAnnotation(const Alphabet* alpha): EdSymbolList(alpha), name_(), comments_() {} SequenceWithAnnotation::SequenceWithAnnotation(const std::string& name, const std::string& sequence, const Alphabet* alpha) throw (BadCharException) : EdSymbolList(alpha), name_(name), comments_() { if (sequence!="") setContent(sequence); } SequenceWithAnnotation::SequenceWithAnnotation(const std::string& name, const std::string& sequence, const Comments& comments, const Alphabet* alpha) throw (BadCharException) : EdSymbolList(alpha), name_(name), comments_(comments) { if (sequence != "") setContent(sequence); } SequenceWithAnnotation::SequenceWithAnnotation(const std::string& name, const std::vector& sequence, const Alphabet* alpha) throw (BadCharException) : EdSymbolList(sequence, alpha), name_(name), comments_() {} SequenceWithAnnotation::SequenceWithAnnotation(const std::string& name, const std::vector& sequence, const Comments& comments, const Alphabet* alpha) throw (BadCharException) : EdSymbolList(sequence, alpha), name_(name), comments_(comments) {} SequenceWithAnnotation::SequenceWithAnnotation(const std::string& name, const std::vector& sequence, const Alphabet* alpha) throw (BadIntException) : EdSymbolList(sequence, alpha), name_(name), comments_() {} SequenceWithAnnotation::SequenceWithAnnotation(const std::string& name, const std::vector& sequence, const Comments& comments, const Alphabet* alpha) throw (BadIntException) : EdSymbolList(sequence, alpha), name_(name), comments_(comments) {} /* Copy constructors: *********************************************************/ SequenceWithAnnotation::SequenceWithAnnotation(const Sequence& s) : EdSymbolList(s), name_(s.getName()), comments_(s.getComments()) {} SequenceWithAnnotation::SequenceWithAnnotation(const SequenceWithAnnotation& s) : EdSymbolList(s), name_(s.getName()), comments_(s.getComments()) {} /* Assignation operator: ******************************************************/ SequenceWithAnnotation& SequenceWithAnnotation::operator=(const Sequence& s) { EdSymbolList::operator=(s); name_ = s.getName(); comments_ = s.getComments(); return *this; } SequenceWithAnnotation& SequenceWithAnnotation::operator=(const SequenceWithAnnotation& s) { EdSymbolList::operator=(s); name_ = s.getName(); comments_ = s.getComments(); return *this; } /******************************************************************************/ void SequenceWithAnnotation::setContent(const std::string& sequence) throw (BadCharException) { SymbolListEditionEvent event(this); fireBeforeSequenceChanged(event); // Remove blanks in sequence content_ = StringSequenceTools::codeSequence(TextTools::removeWhiteSpaces(sequence), getAlphabet()); //Warning, an exception may be thrown here! fireAfterSequenceChanged(event); } /******************************************************************************/ void SequenceWithAnnotation::setToSizeR(size_t newSize) { // Size verification size_t seqSize = content_.size(); if (newSize == seqSize) return; if (newSize < seqSize) { SymbolListDeletionEvent event(this, newSize, seqSize - newSize); fireBeforeSequenceDeleted(event); content_.resize(newSize); fireAfterSequenceDeleted(event); return; } // Add gaps up to specified size SymbolListInsertionEvent event(this, seqSize, newSize - seqSize); fireBeforeSequenceInserted(event); int gap = getAlphabet()->getGapCharacterCode(); while (content_.size() < newSize) content_.push_back(gap); fireAfterSequenceInserted(event); } /******************************************************************************/ void SequenceWithAnnotation::setToSizeL(size_t newSize) { // Size verification size_t seqSize = content_.size(); if (newSize == seqSize) return; if (newSize < seqSize) { //We must truncate sequence from the left. SymbolListDeletionEvent event(this, 0, seqSize - newSize); fireBeforeSequenceDeleted(event); content_.erase(content_.begin(), content_.begin() + (seqSize - newSize)); fireAfterSequenceDeleted(event); return; } // Add gaps up to specified size SymbolListInsertionEvent event(this, 0, newSize - seqSize); fireBeforeSequenceInserted(event); int gap = getAlphabet()->getGapCharacterCode(); content_.insert(content_.begin(), newSize - seqSize, gap); fireAfterSequenceInserted(event); } /******************************************************************************/ void SequenceWithAnnotation::append(const std::vector& content) throw (BadIntException) { SymbolListInsertionEvent event(this, content_.size(), content.size()); fireBeforeSequenceInserted(event); // Check list for incorrect characters for (unsigned int i = 0; i < content.size(); i++) if(!getAlphabet()->isIntInAlphabet(content[i])) throw BadIntException(content[i], "SequenceWithAnnotation::append", getAlphabet()); //SequenceWithAnnotation is valid: for (unsigned int i = 0; i < content.size(); i++) content_.push_back(content[i]); fireAfterSequenceInserted(event); } void SequenceWithAnnotation::append(const std::vector& content) throw (BadCharException) { SymbolListInsertionEvent event(this, content_.size(), content.size()); fireBeforeSequenceInserted(event); // Check list for incorrect characters for (unsigned int i = 0; i < content.size(); i++) if(!getAlphabet()->isCharInAlphabet(content[i])) throw BadCharException(content[i], "SequenceWithAnnotation::append", getAlphabet()); //SequenceWithAnnotation is valid: for (unsigned int i = 0; i < content.size(); i++) content_.push_back(getAlphabet()->charToInt(content[i])); fireAfterSequenceInserted(event); } void SequenceWithAnnotation::append(const std::string& content) throw (BadCharException) { append(StringSequenceTools::codeSequence(content, getAlphabet())); } /******************************************************************************/ vector SequenceWithAnnotation::getAnnotationTypes() const { vector types; for (unsigned int i = 0; i < getNumberOfListeners(); ++i) { const SequenceAnnotation* anno = dynamic_cast(&getListener(i)); if (anno) types.push_back(anno->getType()); } return types; } /******************************************************************************/ void SequenceWithAnnotation::merge(const SequenceWithAnnotation& swa) throw (AlphabetMismatchException, Exception) { // Sequence's alphabets matching verification if ((swa.getAlphabet()->getAlphabetType()) != (getAlphabet()->getAlphabetType())) throw AlphabetMismatchException("SequenceWithAnnotation::merge: Sequence's alphabets don't match ", swa.getAlphabet(), getAlphabet()); // Sequence's names matching verification if (swa.getName() != getName()) throw Exception ("SequenceWithAnnotation::merge: Sequence's names don't match"); // Concatenate sequences and send result propagateEvents(false); append(swa.getContent()); propagateEvents(true); // Try to merge annotations. //First start with annotations in this sequence: vector types = getAnnotationTypes(); vector newTypes = swa.getAnnotationTypes(); for (unsigned int i = 0; i < types.size(); ++i) { vector::iterator it = find(newTypes.begin(), newTypes.end(), types[i]); if (it != newTypes.end()) { //Merge annotations: getAnnotation(types[i]).merge(swa.getAnnotation(types[i])); //Remove annotation from the list: newTypes.erase(it); } else { //Extend annotation to the right: auto_ptr anno(getAnnotation(types[i]).clone()); anno->init(swa); getAnnotation(types[i]).merge(*anno); } } //Now look for annotations in the input sequence: for (unsigned int i = 0; i < newTypes.size(); ++i) { //Extend annotation from the left: SequenceAnnotation* anno = swa.getAnnotation(newTypes[i]).clone(); anno->init(*this); anno->merge(swa.getAnnotation(newTypes[i])); addAnnotation(anno); } } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/SequenceWalker.h000644 000000 000000 00000005556 12147656566 020223 0ustar00rootroot000000 000000 // // File: SequenceWalker.h // Created by: Julien Dutheil // Created on: Thu Nov 24 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2011) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCEWALKER_H_ #define _SEQUENCEWALKER_H_ #include "Sequence.h" namespace bpp { /** * @brief A helper class to easily convert coordinates between sequence and alignments. * * Coordinates are 0-based. * The walker will be more efficient if coordinates are called in pre-ordered way. */ class SequenceWalker { private: const Sequence* seq_; size_t seqPos_, alnPos_; int gap_; public: SequenceWalker(const Sequence& seq): seq_(&seq), seqPos_(0), alnPos_(0), gap_(seq.getAlphabet()->getGapCharacterCode()) { if (seq_->size() > 0) { while ((*seq_)[alnPos_] == gap_) ++alnPos_; } } SequenceWalker(const SequenceWalker& walker): seq_(walker.seq_), seqPos_(walker.seqPos_), alnPos_(walker.alnPos_), gap_(walker.gap_) {} SequenceWalker& operator=(const SequenceWalker& walker) { seq_ = walker.seq_; seqPos_ = walker.seqPos_; alnPos_ = walker.alnPos_; gap_ = walker.gap_; return *this; } virtual ~SequenceWalker() {} public: size_t getAlignmentPosition(size_t seqPos) throw (Exception); size_t getSequencePosition(size_t alnPos) throw (Exception); }; } //end of namespace bpp. #endif //_SEQUENCEWALKER_H_ bpp-seq-2.1.0/src/Bpp/Seq/SymbolListTools.h000644 000000 000000 00000020363 12147656566 020420 0ustar00rootroot000000 000000 // // File: SymbolListTools.h // Created by: Julien Dutheil // Created on: Wed Apr 9 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SYMBOLLISTTOOLS_H_ #define _SYMBOLLISTTOOLS_H_ #include "SymbolList.h" #include "Alphabet/AlphabetExceptions.h" #include // From the STL: #include namespace bpp { /** * @brief Utilitary functions dealing with both sites and sequences. */ class SymbolListTools { public: SymbolListTools() {} virtual ~SymbolListTools() {} public: /** * @brief Count all states in the list. * * @author J. Dutheil * @param list The list. * @param counts The output map to store the counts (existing counts will be incremented). */ static void getCounts(const SymbolList& list, std::map& counts) { for(std::vector::const_iterator seqit = list.getContent().begin(); seqit != list.getContent().end(); seqit++) counts[*seqit]++; } /** * @brief Count all pair of states for two lists of the same size. * * NB: The two lists do node need to share the same alphabet! * The states of the first list will be used as the first index in the output, * and the ones from the second list as the second index. * * @author J. Dutheil * @param list1 The first list. * @param list2 The second list. * @param counts The output map to store the counts (existing counts will be incremented). */ static void getCounts(const SymbolList& list1, const SymbolList& list2, std::map >& counts) throw (DimensionException) { if(list1.size() != list2.size()) throw DimensionException("SymbolListTools::getCounts: the two sites must have the same size.", list1.size(), list2.size()); for(size_t i = 0; i < list1.size(); i++) counts[list1[i]][list2[i]]++; } /** * @brief Count all states in the list, optionaly resolving unknown characters. * * For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. * * @author J. Dutheil * @param list The list. * @param counts The output map to store the counts (existing ocunts will be incremented). * @param resolveUnknowns Tell is unknown characters must be resolved. * For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. * @return A map with all states and corresponding counts. */ static void getCounts(const SymbolList& list, std::map& counts, bool resolveUnknowns); /** * @brief Count all pair of states for two lists of the same size, optionaly resolving unknown characters. * * For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. * * NB: The two lists do node need to share the same alphabet! * The states of the first list will be used as the first index in the output, * and the ones from the second list as the second index. * * @author J. Dutheil * @param list1 The first list. * @param list2 The second list. * @param counts The output map to store the counts (existing ocunts will be incremented). * @param resolveUnknowns Tell is unknown characters must be resolved. * For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. * @return A map with all states and corresponding counts. */ static void getCounts(const SymbolList& list1, const SymbolList& list2, std::map< int, std::map >& counts, bool resolveUnknowns) throw (DimensionException); /** * @brief Get all states frequencies in the list. * * @author J. Dutheil * @param list The list. * @param resolveUnknowns Tell is unknown characters must be resolved. * For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. * @param frequencies The output map with all states and corresponding frequencies. Existing frequencies will be erased if any. */ static void getFrequencies(const SymbolList& list, std::map& frequencies, bool resolveUnknowns = false); /** * @brief Get all state pairs frequencies for two lists of the same size.. * * @author J. Dutheil * @param list1 The first list. * @param list2 The second list. * @param resolveUnknowns Tell is unknown characters must be resolved. * For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. * @param frequencies The output map with all state pairs and corresponding frequencies. Existing frequencies will be erased if any. */ static void getFrequencies(const SymbolList& list1, const SymbolList& list2, std::map >& frequencies, bool resolveUnknowns = false) throw (DimensionException); /** * @brief Get the GC content of a symbol list. * * @param list The list. * @return The proportion of G and C states in the list. * @param ignoreUnresolved Do not count unresolved states. Otherwise, weight by each state probability in case of ambiguity (e.g. the R state counts for 0.5). * @param ignoreGap Do not count gaps in total. * @throw AlphabetException If the list is not made of nucleotide states. */ static double getGCContent(const SymbolList& list, bool ignoreUnresolved = true, bool ignoreGap = true) throw (AlphabetException); /** * @brief Get the number of distinct positions. * * The comparison in achieved from position 0 to the minimum size of the two vectors. * * @param l1 SymbolList 1. * @param l2 SymbolList 2. * @return The number of distinct positions. * @throw AlphabetMismatchException if the two lists have not the same alphabet type. */ static size_t getNumberOfDistinctPositions(const SymbolList& l1, const SymbolList& l2) throw (AlphabetMismatchException); /** * @brief Get the number of positions without gap. * * The comparison in achieved from position 0 to the minimum size of the two vectors. * * @param l1 SymbolList 1. * @param l2 SymbolList 2. * @return The number of positions without gap. * @throw AlphabetMismatchException if the two lists have not the same alphabet type. */ static size_t getNumberOfPositionsWithoutGap(const SymbolList& l1, const SymbolList& l2) throw (AlphabetMismatchException); /** * @brief Change all gap elements to unknown characters. * * @param l The input list of characters. */ static void changeGapsToUnknownCharacters(SymbolList& l); /** * @brief Change all unknown characters to gap elements. * * @param l The input list of characters. */ static void changeUnresolvedCharactersToGaps(SymbolList& l); }; } //end of namespace bpp. #endif // _SYMBOLLISTTOOLS_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/ProteicAlphabet.h000644 000000 000000 00000010700 12147656566 022056 0ustar00rootroot000000 000000 // // File: ProteicAlphabet.h // Authors: Guillaume Deuchst // Julien Dutheil // Sylvain Gaillard // Created on: Tue Jul 22 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _PROTEICALPHABET_H_ #define _PROTEICALPHABET_H_ #include "LetterAlphabet.h" #include "ProteicAlphabetState.h" namespace bpp { /** * @brief This alphabet is used to deal with proteins. * * It supports all 20 amino-acids with their standard denomination. * Gaps are coded by '-', unresolved characters are coded by 'X'. */ class ProteicAlphabet: public LetterAlphabet { /** * @name Overloaded methods from AbstractAlphabet * @{ */ public: const ProteicAlphabetState& getState(const std::string& letter) const throw (BadCharException) { return dynamic_cast( AbstractAlphabet::getState(letter) ); } const ProteicAlphabetState& getState(int num) const throw (BadIntException) { return dynamic_cast( AbstractAlphabet::getState(num) ); } protected: const ProteicAlphabetState& getStateAt(unsigned int pos) const throw (IndexOutOfBoundsException) { return dynamic_cast( AbstractAlphabet::getStateAt(pos) ); } ProteicAlphabetState& getStateAt(unsigned int pos) throw (IndexOutOfBoundsException) { return dynamic_cast( AbstractAlphabet::getStateAt(pos) ); } /** @} */ public: ProteicAlphabet(); virtual ~ProteicAlphabet() {} public: unsigned int getSize() const { return 20; } unsigned int getNumberOfTypes() const { return 23; } int getUnknownCharacterCode() const { return 22; } std::vector getAlias(int state) const throw (BadIntException); std::vector getAlias(const std::string& state) const throw (BadCharException); int getGeneric(const std::vector& states) const throw (BadIntException); std::string getGeneric(const std::vector& states) const throw (BadCharException); bool isUnresolved(int state) const { return state > 19; } bool isUnresolved(const std::string& state) const { return charToInt(state) > 19; } std::string getAlphabetType() const { return "Proteic alphabet"; } public: /** * @name Specific methods * * @{ */ /** * @brief Get the abbreviation (3 letter code) for a state coded as char. * * @param aa Char description of the amino-acid to analyse. */ std::string getAbbr(const std::string & aa) const throw (AlphabetException); /** * @brief Get the abbreviation (3 letter code) for a state coded as int. * * @param aa Int description of the amino-acid to analyse. */ std::string getAbbr(int aa) const throw (AlphabetException); /** @} */ }; } //end of namespace bpp. #endif // _PROTEICALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/ProteicAlphabet.cpp000644 000000 000000 00000017421 12147656566 022420 0ustar00rootroot000000 000000 // // File: ProteicAlphabet.cpp // Authors: Guillaume Deuchst // Julien Dutheil // Sylvain Gaillard // Created on: Tue Jul 22 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "ProteicAlphabet.h" #include "ProteicAlphabetState.h" #include #include using namespace bpp; using namespace std; // From STL: #include /******************************************************************************/ ProteicAlphabet::ProteicAlphabet() { // Alphabet size definition resize(28); // Alphabet content definition setState( 0, ProteicAlphabetState(-1, "-", "GAP", "Gap")); setState( 1, ProteicAlphabetState( 0, "A", "ALA", "Alanine")); setState( 2, ProteicAlphabetState( 1, "R", "ARG", "Arginine")); setState( 3, ProteicAlphabetState( 2, "N", "ASN", "Asparagine")); setState( 4, ProteicAlphabetState( 3, "D", "ASP", "Asparatic Acid")); setState( 5, ProteicAlphabetState( 4, "C", "CYS", "Cysteine")); setState( 6, ProteicAlphabetState( 5, "Q", "GLN", "Glutamine")); setState( 7, ProteicAlphabetState( 6, "E", "GLU", "Glutamic acid")); setState( 8, ProteicAlphabetState( 7, "G", "GLY", "Glycine")); setState( 9, ProteicAlphabetState( 8, "H", "HIS", "Histidine")); setState(10, ProteicAlphabetState( 9, "I", "ILE", "Isoleucine")); setState(11, ProteicAlphabetState(10, "L", "LEU", "Leucine")); setState(12, ProteicAlphabetState(11, "K", "LYS", "Lysine")); setState(13, ProteicAlphabetState(12, "M", "MET", "Methionine")); setState(14, ProteicAlphabetState(13, "F", "PHE", "Phenylalanine")); setState(15, ProteicAlphabetState(14, "P", "PRO", "Proline")); setState(16, ProteicAlphabetState(15, "S", "SER", "Serine")); setState(17, ProteicAlphabetState(16, "T", "THR", "Threonine")); setState(18, ProteicAlphabetState(17, "W", "TRP", "Tryptophan")); setState(19, ProteicAlphabetState(18, "Y", "TYR", "Tyrosine")); setState(20, ProteicAlphabetState(19, "V", "VAL", "Valine")); setState(21, ProteicAlphabetState(20, "B", "B", "N or D")); setState(22, ProteicAlphabetState(21, "Z", "Z", "Q or E")); setState(23, ProteicAlphabetState(22, "X", "X", "Unresolved amino acid")); setState(24, ProteicAlphabetState(22, "O", "O", "Unresolved amino acid")); setState(25, ProteicAlphabetState(22, "0", "0", "Unresolved amino acid")); setState(26, ProteicAlphabetState(22, "?", "?", "Unresolved amino acid")); setState(27, ProteicAlphabetState(-2, "*", "STOP", "Stop")); } /******************************************************************************/ string ProteicAlphabet::getAbbr(const string& aa) const throw (AlphabetException) { string AA = TextTools::toUpper(aa); return (getState(aa).getAbbreviation()); } /******************************************************************************/ string ProteicAlphabet::getAbbr(int aa) const throw (AlphabetException) { return (getState(aa).getAbbreviation()); } /******************************************************************************/ vector ProteicAlphabet::getAlias(int state) const throw (BadIntException) { if(!isIntInAlphabet(state)) throw BadIntException(state, "ProteicAlphabet::getAlias(int): Specified base unknown."); vector v; if(state == 20) {// N or D v.resize(2); v[0] = 2; v[1] = 3; } else if(state == 21) {// Q or E v.resize(2); v[0] = 5; v[1] = 6; } else if(state == 22) {// all! v.resize(20); for(unsigned int i = 0; i < 20; i++) v[i] = i; } else { v.resize(1); v[0] = state; } return v; } /******************************************************************************/ vector ProteicAlphabet::getAlias(const string & state) const throw (BadCharException) { string locstate = TextTools::toUpper(state); if(!isCharInAlphabet(locstate)) throw BadCharException(locstate, "ProteicAlphabet::getAlias(int): Specified base unknown."); vector v; if(locstate == "B") {// N or D v.resize(2); v[0] = "N"; v[1] = "D"; } else if(locstate == "Z") {// Q or E v.resize(2); v[0] = "Q"; v[1] = "E"; } else if(locstate == "X" || locstate == "O" || locstate == "0" || locstate == "?") {// all! v.resize(20); for(unsigned int i = 0; i < 20; i++) v[i] = getState(i).getLetter(); } else { v.resize(1); v[0] = locstate; } return v; } /******************************************************************************/ int ProteicAlphabet::getGeneric(const vector & states) const throw (BadIntException) { map m; for (unsigned int i = 0 ; i < states.size() ; ++i) { vector tmp_s = this->getAlias(states[i]); // get the states for generic characters for (unsigned int j = 0 ; j < tmp_s.size() ; ++j) { m[tmp_s[j]] ++; // add each state to the list } } vector ve = MapTools::getKeys(m); string key; for (unsigned int i = 0 ; i < ve.size() ; ++i) { if (!isIntInAlphabet(ve[i])) throw BadIntException(ve[i], "ProteicAlphabet::getGeneric(const vector): Specified base unknown."); key += "_" + TextTools::toString(ve[i]); } map g; g["_2_3"] = 20; g["_5_6"] = 21; int v; map::iterator it = g.find(key); if (ve.size() == 1) { v = ve[0]; } else if (it != g.end()) { v = it->second; } else { v = 22; } return v; } /******************************************************************************/ string ProteicAlphabet::getGeneric(const vector & states) const throw (BadCharException) { map m; for (unsigned int i = 0 ; i < states.size() ; ++i) { vector tmp_s = this->getAlias(states[i]); // get the states for generic characters for (unsigned int j = 0 ; j < tmp_s.size() ; ++j) { m[tmp_s[j]] ++; // add each state to the list } } vector ve = MapTools::getKeys(m); string key; for (unsigned int i = 0 ; i < ve.size() ; ++i) { if (!isCharInAlphabet(ve[i])) throw BadCharException(ve[i], "ProteicAlphabet::getAlias(const vector): Specified base unknown."); key += TextTools::toString(ve[i]); } map g; g["DN"] = "B"; g["EQ"] = "Z"; string v; map::iterator it = g.find(key); if (ve.size() == 1) { v = ve[0]; } else if (it != g.end()) { v = it->second; } else { v = "?"; } return v; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/AlphabetExceptions.h000644 000000 000000 00000014223 12147656566 022576 0ustar00rootroot000000 000000 // // File: AlphabetExceptions.h // Created by: Julien Dutheil // Created on: Mon Nov 3 16:41:53 2003 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ALPHABETEXCEPTIONS_H_ #define _ALPHABETEXCEPTIONS_H_ #include // From the STL: #include namespace bpp { class Alphabet; /** * @brief The alphabet exception base class. * * @see Alphabet, Exception */ class AlphabetException: public Exception { private: const Alphabet* alphabet_; public: /** * @brief Build a new AlphabetException object. * * @param text A message to be passed to the exception hierarchy. * @param alpha A const pointer toward the alphabet that threw the exception. */ AlphabetException(const std::string& text, const Alphabet* alpha = 0); AlphabetException(const AlphabetException& ae): Exception(ae), alphabet_(ae.alphabet_) {} AlphabetException& operator=(const AlphabetException& ae) { Exception::operator=(ae); alphabet_ = ae.alphabet_; return *this; } virtual ~AlphabetException() throw () {} public: /** * @brief Get the alphabet that threw the exception. * * @return a const pointer toward the alphabet. */ virtual const Alphabet* getAlphabet() const { return alphabet_; } }; /** * @brief An alphabet exception thrown when trying to specify a bad char to the alphabet. */ class BadCharException: public AlphabetException { protected: std::string c_; public: /** * @brief Build a new BadCharException. * * @param badChar The faulty character. * @param text A message to be passed to the exception hierarchy. * @param alpha A const pointer toward the alphabet that threw the exception. */ BadCharException(const std::string & badChar, const std::string & text = "", const Alphabet * alpha = 0); virtual ~BadCharException() throw() {}; public: /** * @brief Get the character that threw the exception. * * @return the faulty character. */ virtual std::string getBadChar() const; }; /** * @brief An alphabet exception thrown when trying to specify a bad int to the alphabet. */ class BadIntException: public AlphabetException { protected: int i_; public: /** * @brief Build a new BadIntException. * @param badInt The faulty integer. * @param text A message to be passed to the exception hierarchy. * @param alpha A const pointer toward the alphabet that threw the exception. */ BadIntException(int badInt, const std::string& text = "", const Alphabet* alpha = 0); virtual ~BadIntException() throw() {} public: /** * @brief Get the integer that threw the exception. * * @return the faulty integer. */ virtual int getBadInt() const; }; /** * @brief Exception thrown when two alphabets do not match. * * Typically, this may occur when you try to add a bad sequence to a container, * or concatenate two kinds of sequences, and so on. */ class AlphabetMismatchException : public Exception { private: const Alphabet* alphabet1_, * alphabet2_; public: /** * @brief Build a new AlphabetMismatchException object. * * @param text A message to be passed to the exception hierarchy. * @param alpha1 A const pointer toward the first alphabet. * @param alpha2 A const pointer toward the second alphabet, i.e. the one which does not match with the first. */ AlphabetMismatchException(const std::string& text = "", const Alphabet* alpha1 = 0, const Alphabet* alpha2 = 0); AlphabetMismatchException(const AlphabetMismatchException& ame): Exception(ame), alphabet1_(ame.alphabet1_), alphabet2_(ame.alphabet2_) {} AlphabetMismatchException& operator=(const AlphabetMismatchException& ame) { Exception::operator=(ame); alphabet1_ = ame.alphabet1_; alphabet2_ = ame.alphabet2_; return *this; } virtual ~AlphabetMismatchException() throw() {} public: /** * @brief Get the alphabets that do not match. * * @return a vector of pointers toward the alphabets. */ std::vector getAlphabets() const; }; /** * @brief Exception thrown in case no character is available for a certain state in an alphabet. */ class CharStateNotSupportedException : public AlphabetException { public: /** * @brief Build a new CharStateNotSupportedException. * * @param text A message to be passed to the exception hierarchy. * @param alpha A const pointer toward the alphabet that threw the exception. */ CharStateNotSupportedException(const std::string & text = "", const Alphabet * alpha = 0); virtual ~CharStateNotSupportedException() throw() {}; }; } //end of namespace bpp. #endif //_ALPHABETEXCEPTIONS_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/AlphabetState.h000644 000000 000000 00000007462 12147656566 021544 0ustar00rootroot000000 000000 // // File: AlphabetState.h // Author: Sylvain Gaillard // Created: 29/07/2009 13:56:01 // /* Copyright or © or Copr. Bio++ Development Team, (July 29, 2009) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ALPHABETSTATE_H_ #define _ALPHABETSTATE_H_ #include // From the STL #include namespace bpp { /** * @brief This is the base class to describe states in an Alphabet. * * @author Sylvain Gaillard */ class AlphabetState: public virtual Clonable { private: int num_; std::string letter_; std::string name_; public: AlphabetState(int num, const std::string& letter, const std::string& name): num_(num), letter_(letter), name_(name) {} // Class destructor virtual ~AlphabetState() {} public: /** * @name The Clonable interface. * @{ */ #ifdef NO_VIRTUAL_COV Clonable* #else AlphabetState* #endif clone() const { return new AlphabetState(* this); } /** @} */ /** * @brief Get the state's number. * * @return The state's number (i.e. -1 for gap (-)). */ int getNum() const { return num_; } /** * @brief Set the state's number. * * @param num The state's number. */ void setNum(int num) { num_ = num; } /** * @brief Get the letter(s) corresponding to the state. * * The letter is a string because it may more than one char * (for instance: codon). * * @return The state's letter. */ const std::string& getLetter() const { return letter_; } /** * @brief Set the letter(s) of the state. * * @param letter The state's letter. */ void setLetter(const std::string& letter) { letter_ = letter; } /** * @brief Get the name of the state. * * @return The full name of the state (i.e. Adenine). */ const std::string& getName() const { return name_; } /** * @brief Set the name of the state. * * @param name The state's name */ void setName(const std::string& name) { name_ = name; } /** * @brief operator == * * Comparison is done on state num */ bool operator == (AlphabetState& l2) { return getNum() == l2.getNum(); } }; } #endif // _ALPHABETSTATE_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/BinaryAlphabet.cpp000644 000000 000000 00000004111 12147656566 022227 0ustar00rootroot000000 000000 // // File: BinaryAlphabet.cpp // Authors: Laurent Gueguen // Created on: 2009 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "BinaryAlphabet.h" #include "AlphabetState.h" // From Utils: #include using namespace bpp; BinaryAlphabet::BinaryAlphabet() { // Alphabet size definition resize(3); // Alphabet content definition setState(0, AlphabetState(-1, "-", "Gap")); for (unsigned int i = 0; i < 2; i++) { setState(i + 1, AlphabetState(i, TextTools::toString(i), "")); } } bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/BinaryAlphabet.h000644 000000 000000 00000005022 12147656566 021676 0ustar00rootroot000000 000000 // // File: BinaryAlphabet.h // Author: L Gueguen // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _BINARYALPHABET_H_ #define _BINARYALPHABET_H_ #include "AbstractAlphabet.h" namespace bpp { /** * @brief The BinaryAlphabet class, letters are 0 and 1. * @author Laurent Gueguen * */ class BinaryAlphabet : public AbstractAlphabet { protected: void registerState(const AlphabetState& st) { AbstractAlphabet::registerState(*(st.clone())); } public: // class constructor BinaryAlphabet(); // class destructor virtual ~BinaryAlphabet() {} public: unsigned int getSize() const { return 2; } unsigned int getNumberOfTypes() const { return 2; } std::string getAlphabetType() const { return "Binary alphabet"; } int getUnknownCharacterCode() const { return 3; } bool isUnresolved(int state) const { return state == 3; } bool isUnresolved(const std::string& state) const { return false; } }; } // end of namespace bpp. #endif // _BINARYALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/RNA.h000644 000000 000000 00000005443 12147656566 017440 0ustar00rootroot000000 000000 // // File: RNA.h // Created by: Guillaume Deuchst // Created on: Tue Jul 22 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _RNA_H_ #define _RNA_H_ #include "NucleicAlphabet.h" //From the STL: #include namespace bpp { /** * @brief This alphabet is used to deal with RNA sequences. * * It supports all 4 nucleotides (A, U, G and C) with their standard denomination. * Gaps are coded by '-', unresolved characters are coded by 'X, N, O, 0 or ?'. * Extensive support for generic characters (e.g. 'P', 'Y', etc.) is provided. */ class RNA: public NucleicAlphabet { public: /** * @param exclamationMarkCountsAsGap If yes, '!' characters are replaced by gaps. * Otherwise, they are counted as unknown characters. */ RNA(bool exclamationMarkCountsAsGap = false); virtual ~RNA() {} public: std::vector getAlias(int state) const throw (BadIntException); std::vector getAlias(const std::string & state) const throw (BadCharException); int getGeneric(const std::vector & states) const throw (BadIntException); std::string getGeneric(const std::vector & states) const throw (BadCharException); std::string getAlphabetType() const { return "RNA alphabet"; } }; } //end of namespace bpp. #endif // _RNA_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/RNY.cpp000644 000000 000000 00000025002 12147656566 020014 0ustar00rootroot000000 000000 // // File: RNY.cpp // Created by: Laurent Gueguen // Created on: Tue Jul 31 2007 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "RNY.h" // class's header file // From Utils: #include #include using namespace std; using namespace bpp; /****************************************************************************************/ RNY::RNY(const NucleicAlphabet& na) : nuclalph_(na) { // Alphabet size definition resize(351); int i,j,k,l; for (i = 0; i < 351; i++) { setState(i,AlphabetState(i,"","")); } // Alphabet content definition // / changer pour ARN string s1 = "RCT-"; string s2 = "AGCT-"; string s3 = "AGY-"; string s = " "; // NNN (0->35) for (i = 0; i < 3; i++) { for (j = 0; j < 4; j++) { for (k = 0; k < 3; k++) { l = i * 12 + j * 3 + k; s[0] = s1[i]; s[1] = s2[j]; s[2] = s3[k]; setState(l, AlphabetState(l,s,s)); } } } // NN- (50->83) for (i = 0; i < 3; i++) { for (j = 0; j < 4; j++) { l = 50 + 12 * i + j * 3; s[0] = s1[i]; s[1] = s2[j]; s[2] = s3[3]; setState(l, AlphabetState(l,s,s)); } } // N-N (100->126) for (i = 0; i < 3; i++) { for (k = 0; k < 3; k++) { l = 100 + 12 * i + k; s[0] = s1[i]; s[1] = s2[4]; s[2] = s3[k]; setState(l, AlphabetState(l,s,s)); } } // N-- (150->152) for (i = 0; i < 3; i++) { l = 150 + 12 * i; s[0] = s1[i]; s[1] = s2[4]; s[2] = s3[3]; setState(l, AlphabetState(l,s,s)); } // -NN (200->211) for (j = 0; j < 4; j++) { for (k = 0; k < 3; k++) { l = 200 + j * 3 + k; s[0] = s1[3]; s[1] = s2[j]; s[2] = s3[k]; setState(l, AlphabetState(l,s,s)); } } // -N- (250->253) for (j = 0; j < 4; j++) { l = 250 + 3 * j; s[0] = s1[3]; s[1] = s2[j]; s[2] = s3[3]; setState(l, AlphabetState(l,s,s)); } // --N (300->302) for (k = 0; k < 3; k++) { l = 300 + k; s[0] = s1[3]; s[1] = s2[4]; s[2] = s3[k]; setState(l, AlphabetState(l,s,s)); } // --- (350) l = 350; s[0] = s1[3]; s[1] = s2[4]; s[2] = s3[3]; setState(l, AlphabetState(l,s,s)); } /****************************************************************************************/ vector RNY::getAlias(int state) const throw (BadIntException) { if (!isIntInAlphabet(state)) throw BadIntException(state, "RNY::getAlias(int): Specified base unknown."); vector v; int qs = state / 50; int rs = state % 50; int i,j,k; switch (qs) { case 0: // NNN v.resize(1); v[0] = rs; break; case 1: // NN- v.resize(3); for (k = 0; k < 3; k++) { v[k] = k + rs; } break; case 2: // N-N v.resize(4); for (j = 0; j < 4; j++) { v[j] = 3 * j + rs; } break; case 3: // N-- v.resize(12); for (j = 0; j < 4; j++) { for (k = 0; k < 3; k++) { v[3 * j + k] = rs + 3 * j + k; } } break; case 4: // -NN v.resize(3); for (i = 0; i < 3; i++) { v[i] = 12 * i + rs; } break; case 5: // -N- v.resize(9); for (i = 0; i < 3; i++) { for (k = 0; k < 3; k++) { v[3 * i + k] = rs + 12 * i + k; } } break; case 6: // --N v.resize(12); for (i = 0; i < 3; i++) { for (j = 0; j < 4; j++) { v[4 * i + j] = rs + 12 * i + 3 * j; } } break; case 7: // --- v.resize(36); for (i = 0; i < 3; i++) { for (j = 0; j < 4; j++) { for (k = 0; k < 3; k++) { v[12 * i + 3 * j + k] = 12 * i + 3 * j + k; } } } break; } return v; } const NucleicAlphabet& RNY::getLetterAlphabet() const { return nuclalph_; } /****************************************************************************************/ vector RNY::getAlias(const string& state) const throw (BadCharException) { if (!isCharInAlphabet(state)) throw BadCharException(state, "RNY::getAlias(int): Specified base unknown."); vector v = getAlias(charToInt(state)); vector s; size_t size = v.size(); s.resize(size); for (size_t i = 0; i < size; i++) { s[i] = AbstractAlphabet::intToChar(v[i]); } return s; } /****************************************************************************************/ string RNY::getRNY(const string& pos1, const string& pos2, const string& pos3) const throw (BadCharException) { string tr; if (pos1 == "A" || pos1 == "G") tr = "R"; else tr = pos1; tr += pos2; if (pos3 == "T" || pos3 == "C") tr += "Y"; else tr += pos3; // teste triplet; charToInt(tr); return tr; } /**************************************************************************************/ int RNY::getRNY(int i, int j, int k, const Alphabet& alph) const throw (BadCharException) { if (alph.getAlphabetType() != "DNA alphabet") { throw AlphabetException ("RNY::getRNY : Sequence must be DNA", &alph); } char li = alph.intToChar(i)[0]; char lj = alph.intToChar(j)[0]; char lk = alph.intToChar(k)[0]; int r = 0; int s = 0; switch (li) { case 'A': case 'G': r += 0; break; case 'C': r += 1; break; case 'T': r += 2; break; case '-': case 'N': s += 1; break; default: throw BadCharException(&li, "RNY::getRNY(int,int;int,alph): Specified base unknown."); } r *= 4; s *= 2; switch (lj) { case 'A': r += 0; break; case 'G': r += 1; break; case 'C': r += 2; break; case 'T': r += 3; break; case '-': case 'N': s += 1; break; default: throw BadCharException(&lj, "RNY::getRNY(int,int;int,alph): Specified base unknown."); } r *= 3; s *= 2; switch (lk) { case 'A': r += 0; break; case 'G': r += 1; break; case 'C': case 'T': r += 2; break; case '-': case 'N': s += 1; break; default: throw BadCharException(&lk, "RNY::getRNY(int,int;int,alph): Specified base unknown."); } return 50 * s + r; } /****************************************************************************************/ bool RNY::isGap(int state) const { return state==350; } bool RNY::containsGap(const string& state) const throw (BadCharException) { return (state.find("-")!=string::npos); } bool RNY::isUnresolved(const string& state) const { return containsGap(state); } bool RNY::isUnresolved(int state) const { return (state>=50 && state!=350); } /****************************************************************************************/ int RNY::charToInt(const string& state) const throw (BadCharException) { if (state.size() != 3) throw BadCharException(state, "RNY::charToInt", this); else return AbstractAlphabet::charToInt(state); } /************************************************************/ string RNY::intToChar(int state) const throw (BadIntException) { unsigned int i,j,k,l; for (i = 0; i < 3; i++) { for (j = 0; j < 4; j++) { for (k = 0; k < 3; k++) { l = i * 12 + j * 3 + k; if (getState(l).getNum() == state) return getState(l).getLetter(); } } } // NN- (50->83) for (i = 0; i < 3; i++) { for (j = 0; j < 4; j++) { l = 50 + 12 * i + j * 3; if (getState(l).getNum() == state) return getState(l).getLetter(); } } // N-N (100->126) for (i = 0; i < 3; i++) { for (k = 0; k < 3; k++) { l = 100 + 12 * i + k; if (getState(l).getNum() == state) return getState(l).getLetter(); } } // N-- (150->152) for (i = 0; i < 3; i++) { l = 150 + 12 * i; if (getState(l).getNum() == state) return getState(l).getLetter(); } // -NN (200->211) for (j = 0; j < 4; j++) { for (k = 0; k < 3; k++) { l = 200 + j * 3 + k; if (getState(l).getNum() == state) return getState(l).getLetter(); } } // -N- (250->253) for (j = 0; j < 4; j++) { l = 250 + 3 * j; if (getState(l).getNum() == state) return getState(l).getLetter(); } // --N (300->302) for (k = 0; k < 3; k++) { l = 300 + k; if (getState(l).getNum() == state) return getState(l).getLetter(); } // --- (350) l = 350; if (getState(l).getNum() == state) return getState(l).getLetter(); throw BadIntException(state, "RNY::intToChar: Specified base unknown", this); return "XXX"; } bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/InvertebrateMitochondrialCodonAlphabet.cpp000644 000000 000000 00000004761 12147656566 027150 0ustar00rootroot000000 000000 // // File: InvertebrateMitochondrialCodonAlphabet.h // Authors: Eric Bazin // Sylvain Gaillard // Created on: thu mar 1 14:25:09 CET 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "InvertebrateMitochondrialCodonAlphabet.h" using namespace bpp; using namespace std; InvertebrateMitochondrialCodonAlphabet::InvertebrateMitochondrialCodonAlphabet(const NucleicAlphabet* alpha) : CodonAlphabet(alpha) { string A = alpha->intToChar(0); string G = alpha->intToChar(2); string T = alpha->intToChar(3); vector vstop; vstop.push_back(T + A + A); vstop.push_back(T + A + G); int istop; unsigned int j; for (unsigned int i = 0 ; i < vstop.size() ; i++) { istop = charToInt(vstop[i]); stopCodons_.push_back(istop); j = 0; while (j < getNumberOfChars()) { if (getStateAt(j).getNum() == istop) { getStateAt(j).setName(STOP); break; } j++; } } initCodon_ = charToInt(A + T + G); } bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/EchinodermMitochondrialCodonAlphabet.h000644 000000 000000 00000004714 12147656566 026236 0ustar00rootroot000000 000000 // // File: EchinodermMitochondrialCodonAlphabet.h // Created by: Eric Bazin // Created on: 14 11:31:27 CET 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ECHINODERMMITOCHONDRIALCODONALPHABET_H_ #define _ECHINODERMMITOCHONDRIALCODONALPHABET_H_ #include "CodonAlphabet.h" namespace bpp { /** * @brief This class implements the Echinoderm and Faltworms * Mitochondrial codon alphabet as describe on the NCBI website: * http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG9 * @author Laurent Guéguen */ class EchinodermMitochondrialCodonAlphabet : public CodonAlphabet { public: EchinodermMitochondrialCodonAlphabet(const NucleicAlphabet * alpha); virtual ~EchinodermMitochondrialCodonAlphabet() {}; public: std::string getAlphabetType() const { return "Codon alphabet: EchinodermMitochondrialCodonAlphabet(" + vAbsAlph_[0]->getAlphabetType() + ")"; } }; } //end of namespace bpp. #endif //_ECHINODERMMITOCHONDRIALCODONALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/StandardCodonAlphabet.cpp000644 000000 000000 00000004702 12147656566 023534 0ustar00rootroot000000 000000 // // File: StandardCodonAlphabet.h // Authors: Julien Dutheil // Sylvain Gaillard // Created on: Sun Oct 12 17:51:36 2003 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "StandardCodonAlphabet.h" using namespace bpp; using namespace std; StandardCodonAlphabet::StandardCodonAlphabet(const NucleicAlphabet* alpha) : CodonAlphabet(alpha) { string A = alpha->intToChar(0); string G = alpha->intToChar(2); string T = alpha->intToChar(3); vector vstop; vstop.push_back(T + A + A); vstop.push_back(T + A + G); vstop.push_back(T + G + A); int istop; unsigned int j; for (unsigned int i = 0 ; i < vstop.size() ; i++) { istop = charToInt(vstop[i]); stopCodons_.push_back(istop); j = 0; while (j < getNumberOfChars()) { if (getStateAt(j).getNum() == istop) { getStateAt(j).setName(STOP); break; } j++; } } initCodon_ = charToInt(A + T + G); } bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/WordAlphabet.cpp000644 000000 000000 00000021741 12147656566 021726 0ustar00rootroot000000 000000 // // File: WordAlphabet.h // Authors: Laurent Gueguen // Sylvain Gaillard // Created on: Sun Dec 28 2008 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "WordAlphabet.h" #include using namespace bpp; // From the STL: #include using namespace std; WordAlphabet::WordAlphabet(const vector& vAlpha) : AbstractAlphabet(), vAbsAlph_(vAlpha) { build_(); } WordAlphabet::WordAlphabet(const Alphabet* pAlpha, unsigned int num) : AbstractAlphabet(), vAbsAlph_(0) { for (unsigned int i = 0; i < num; i++) { vAbsAlph_.push_back(pAlpha); } build_(); } void WordAlphabet::build_() { unsigned int size = 1; for (unsigned int i = 0; i < vAbsAlph_.size(); i++) { size *= vAbsAlph_[i]->getSize(); } resize(size + 2); string s = ""; for (unsigned int i = 0; i < vAbsAlph_.size(); i++) { s += "-"; } setState(0, AlphabetState(-1, s, "gap")); for (unsigned int i = 0; i < size; i++) { setState(i + 1, AlphabetState(i, "", "")); } unsigned lr = size; char c; for (unsigned int na = 0; na < vAbsAlph_.size(); na++) { lr /= vAbsAlph_[na]->getSize(); unsigned int j = 1; unsigned int i = 0; while (j <= size) { c = vAbsAlph_[na]->intToChar(i)[0]; for (unsigned int k = 0; k < lr; k++) { getStateAt(j).setLetter(getStateAt(j).getLetter() + c); j++; // alphabet[j++].letter += c; } if (++i == vAbsAlph_[na]->getSize()) i = 0; } } s = ""; for (unsigned i = 0; i < vAbsAlph_.size(); i++) { s += "N"; } setState(size + 1, AlphabetState(size, s, "Unresolved")); remap(); } /******************************************************************************/ std::string WordAlphabet::getAlphabetType() const { string s = "Word alphabet:"; for (unsigned int i = 0; i < vAbsAlph_.size(); i++) { s += " " + vAbsAlph_[i]->getAlphabetType(); } return s; } bool WordAlphabet::hasUniqueAlphabet() const { string s = vAbsAlph_[0]->getAlphabetType(); for (unsigned int i = 1; i < vAbsAlph_.size(); i++) { if (vAbsAlph_[i]->getAlphabetType() != s) return false; } return true; } bool WordAlphabet::containsUnresolved(const std::string& state) const throw (BadCharException) { size_t s = vAbsAlph_.size(); if (state.length() != s) throw BadCharException(state, "WordAlphabet::containsUnresolved", this); for (size_t i = 0; i < vAbsAlph_.size(); i++) { if (vAbsAlph_[i]->isUnresolved(state.substr(i, 1))) { return true; } } return false; } /******************************************************************************/ bool WordAlphabet::containsGap(const std::string& state) const throw (BadCharException) { size_t s = vAbsAlph_.size(); if (state.length() != s) throw BadCharException(state, "WordAlphabet::containsGap", this); for (size_t i = 0; i < vAbsAlph_.size(); i++) { if (vAbsAlph_[i]->isGap(state.substr(i, 1))) return true; } return false; } /******************************************************************************/ std::string WordAlphabet::getName(const std::string& state) const throw (BadCharException) { if (state.size() != vAbsAlph_.size()) throw BadCharException(state, "WordAlphabet::getName", this); if (containsUnresolved(state)) return getStateAt(getSize() + 1).getName(); if (containsGap(state)) return getStateAt(0).getName(); else return AbstractAlphabet::getName(state); } /******************************************************************************/ std::vector WordAlphabet::getAlias(int state) const throw (BadIntException) { if (!isIntInAlphabet(state)) throw BadIntException(state, "WordAlphabet::getAlias(int): Specified base unknown."); vector v; int i, s = getSize(); if (state == s) { v.resize(s); for (i = 0; i < s; i++) { v[i] = i; } } else { v.resize(1); v[0] = state; } return v; } /******************************************************************************/ std::vector WordAlphabet::getAlias(const std::string& state) const throw (BadCharException) { string locstate = TextTools::toUpper(state); if (!isCharInAlphabet(locstate)) throw BadCharException(locstate, "WordAlphabet::getAlias(string): Specified base unknown."); vector v; unsigned int i, s = getSize(); string st = ""; for (i = 0; i < vAbsAlph_.size(); i++) { st += "N"; } if (locstate == st) { v.resize(s); for (i = 0; i < s; i++) { v[i] = intToChar(i); } } else { v.resize(1); v[0] = state; } return v; } /******************************************************************************/ int WordAlphabet::getGeneric(const std::vector& states) const throw (BadIntException) { return states[0]; } /******************************************************************************/ std::string WordAlphabet::getGeneric(const std::vector& states) const throw (BadCharException) { return states[0]; } /******************************************************************************/ int WordAlphabet::getWord(const std::vector& vint, size_t pos) const throw (IndexOutOfBoundsException) { if (vint.size() < pos + vAbsAlph_.size()) throw IndexOutOfBoundsException("WordAlphabet::getWord", pos, 0, vint.size() - vAbsAlph_.size()); vector vs; for (size_t i = 0; i < vAbsAlph_.size(); i++) { vs.push_back(vAbsAlph_[i]->intToChar(vint[i + pos])); } return charToInt(getWord(vs)); // This can't throw a BadCharException! } /****************************************************************************************/ std::string WordAlphabet::getWord(const std::vector& vpos, size_t pos) const throw (IndexOutOfBoundsException, BadCharException) { if (vpos.size() < pos + vAbsAlph_.size()) throw IndexOutOfBoundsException("WordAlphabet::getWord", pos, 0, vpos.size() - vAbsAlph_.size()); string s = ""; for (size_t i = 0; i < vAbsAlph_.size(); i++) { s += vpos[pos + i]; } // test charToInt(s); return s; } /****************************************************************************************/ Sequence* WordAlphabet::translate(const Sequence& sequence, size_t pos) const throw (AlphabetMismatchException, Exception) { if ((!hasUniqueAlphabet()) or (sequence.getAlphabet()->getAlphabetType() != vAbsAlph_[0]->getAlphabetType())) throw AlphabetMismatchException("No matching alphabets", sequence.getAlphabet(), vAbsAlph_[0]); vector v1 = sequence.getContent(); vector v2; size_t s = sequence.size(); unsigned int l = getLength(); size_t i = pos; while (i + l < s) { v2.push_back(getWord(v1, i)); i += l; } return new BasicSequence(sequence.getName(), v2, this); } /****************************************************************************************/ Sequence* WordAlphabet::reverse(const Sequence& sequence) const throw (AlphabetMismatchException, Exception) { if ((!hasUniqueAlphabet()) or (sequence.getAlphabet()->getAlphabetType() != getAlphabetType())) throw AlphabetMismatchException("No matching alphabets"); Sequence* pseq = new BasicSequence(sequence.getName(), "", getNAlphabet(0)); size_t s = sequence.size(); for (size_t i = 0; i < s; i++) { pseq->append(getPositions(sequence[i])); } return pseq; } /****************************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/ProteicAlphabetState.h000644 000000 000000 00000005340 12147656566 023063 0ustar00rootroot000000 000000 // // File: ProteicAlphabetState.h // Author: Sylvain Gaillard // Created: 29/07/2009 13:56:01 // /* Copyright or © or Copr. CNRS, (July 29, 2009) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _PROTEICALPHABETSTATE_H_ #define _PROTEICALPHABETSTATE_H_ // From the STL #include namespace bpp { /** * @brief This is the base class to describe states in a ProteicAlphabet. * * @author Sylvain Gaillard */ class ProteicAlphabetState: public AlphabetState { private: std::string abbr_; public: ProteicAlphabetState(int num, const std::string & letter, const std::string & abbr, const std::string & name): AlphabetState(num, letter, name), abbr_(abbr) {} // Class destructor virtual ~ProteicAlphabetState() {} public: ProteicAlphabetState * clone() const { return new ProteicAlphabetState(* this); } /** * @brief Get the state's abbreviation. * * @return The state's abbreviation. */ const std::string & getAbbreviation() const { return abbr_; } /** * @brief Set the state's abbreviation. * * @param abbr The state's abbreviation. */ void setAbbreviation(const std::string & abbr) { abbr_ = abbr; } }; } #endif // _PROTEICALPHABETSTATE_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/AlphabetTools.h000644 000000 000000 00000020173 12147656566 021556 0ustar00rootroot000000 000000 // // File: AlphabetTools.h // Created by: Julien Dutheil // Created on: Fri Oct 10 17:27:39 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ALPHABETTOOLS_H_ #define _ALPHABETTOOLS_H_ #include "DNA.h" #include "RNA.h" #include "ProteicAlphabet.h" #include "DefaultAlphabet.h" #include "CodonAlphabet.h" #include "RNY.h" #include "BinaryAlphabet.h" #include #include namespace bpp { /** * @brief Utilitary functions dealing with alphabets. */ class AlphabetTools { public: static const DNA DNA_ALPHABET; static const RNA RNA_ALPHABET; static const ProteicAlphabet PROTEIN_ALPHABET; static const DefaultAlphabet DEFAULT_ALPHABET; public: AlphabetTools() {} virtual ~AlphabetTools() {} public: /** * @brief Character identification method for sequence's alphabet identification * * Return : * - -1 gap * - 1 DNA specific (no character!) * - 2 RNA specific (U) * - 3 Protein specific (characters E, F, I, L, P, Q) * - 4 Nucleotide specific (no character) * - 5 DNA or Protein specific (T) * - 6 RNA or Protein specific (no character) * - 7 Any alphabet (A, B, C, D, G, H, J, K, M, N, O, R, S, V, W, X, Y, Z, 0) * - 0 Unknown character * * @param state The character to test. * @return The type code. */ static int getType(char state); /** * @brief This checks that all characters in the alphabet are coded by a string of same length. * * This method is used when states are coded by more than one character, typically: codon alphabets. * * @param alphabet The alphabet to check. * @return True if all text description have the same length (e.g. 3 for codon alphabet). */ static bool checkAlphabetCodingSize(const Alphabet& alphabet) throw (AlphabetException); /** * @brief This checks that all characters in the alphabet are coded by a string of same length. * * This function performs the same work as the previous one, but deals with pointers * instead of reference. This may be more convenient since we often use pointers on alphabets. * * @param alphabet a pointer toward the alphabet to check. * @return True if all text description have the same length (e.g. 3 for codon alphabet). */ static bool checkAlphabetCodingSize(const Alphabet* alphabet) throw (AlphabetException); /** * @brief In case that all states in the given alphabet have a string description of same length, * send this length; e.g. 3 for codon alphabets. * * @param alphabet The alphabet to analyse. * @return The common size of all text descriptionif there is one. Else throws an AlphabetException. */ static unsigned int getAlphabetCodingSize(const Alphabet& alphabet) throw (AlphabetException); /** * @brief In case that all states in the given alphabet have a string description of same length, * send this length; e.g. 3 for codon alphabets. * * This function performs the same work as the previous one, but deals with pointers * instead of reference. This may be more convenient since we often use pointers on alphabets. * * @param alphabet a pointer toward the alphabet to analyse. * @return The common size of all text descriptionif there is one. Else throws an AlphabetException. */ static unsigned int getAlphabetCodingSize(const Alphabet* alphabet) throw (AlphabetException); /** * @return True if the alphabet is an instanciation of the NucleicAlphabet class. * @param alphabet The alphabet to check. */ static bool isNucleicAlphabet(const Alphabet* alphabet) { return alphabetInheritsFrom(alphabet); } /** * @return True if the alphabet is an instanciation of the DNA class. * @param alphabet The alphabet to check. */ static bool isDNAAlphabet(const Alphabet* alphabet) { return alphabetInheritsFrom(alphabet); } /** * @return True if the alphabet is an instanciation of the RNA class. * @param alphabet The alphabet to check. */ static bool isRNAAlphabet(const Alphabet* alphabet) { return alphabetInheritsFrom(alphabet); } /** * @return True if the alphabet is an instanciation of the ProteicAlphabet class. * @param alphabet The alphabet to check. */ static bool isProteicAlphabet(const Alphabet* alphabet) { return alphabetInheritsFrom(alphabet); } /** * @return True if the alphabet is an instanciation of the Codon class. * @param alphabet The alphabet to check. */ static bool isCodonAlphabet(const Alphabet* alphabet) { return alphabetInheritsFrom(alphabet); } /** * @return True if the alphabet is an instanciation of the WordAlphabet class. * @param alphabet The alphabet to check. */ static bool isWordAlphabet(const Alphabet* alphabet) { return alphabetInheritsFrom(alphabet); } /** * @return True if the alphabet is an instanciation of the RNY class. * @param alphabet The alphabet to check. */ static bool isRNYAlphabet(const Alphabet* alphabet) { return alphabetInheritsFrom(alphabet); } /** * @return True if the alphabet is an instanciation of the BinaryAlphabet class. * @param alphabet The alphabet to check. */ static bool isBinaryAlphabet(const Alphabet* alphabet) { return alphabetInheritsFrom(alphabet); } /** * @return True if the alphabet is an instanciation of the DefaultAlphabet class. * @param alphabet The alphabet to check. */ static bool isDefaultAlphabet(const Alphabet* alphabet) { return alphabetInheritsFrom(alphabet); } /** * @brief Tell if two characters match according to a given alphabet. * * Example (DNA): * @verbatim * A,T: false * A,A: true * A,N: true * A,Y: false * N,Y: true * N,N: true * @endverbatim * * @return True if the two characters are identical, or are compatible if at least one of them is a generic character. * @param alphabet The alphabet to use. * @param i First character to check. * @param j Secondt character to check. */ static bool match(const Alphabet* alphabet, int i, int j) { std::vector a = alphabet->getAlias(i); std::vector b = alphabet->getAlias(j); std::vector u = VectorTools::vectorIntersection(a, b); return u.size() > 0; } private: template static bool alphabetInheritsFrom(const Alphabet* alphabet) { try { const Y* t = dynamic_cast(alphabet); return t != 0; // Solves strange behavior in new gcc? } catch (std::exception& e) { return false; } } }; } // end of namespace bpp. #endif // _ALPHABETTOOLS_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/DefaultAlphabet.h000644 000000 000000 00000005217 12147656566 022044 0ustar00rootroot000000 000000 // // File: DefaultAlphabet.h // Author: Julien Dutheil // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _DEFAULTALPHABET_H_ #define _DEFAULTALPHABET_H_ #include "LetterAlphabet.h" #include namespace bpp { /** * @brief The DefaultAlphabet class. * * This alphabet should match virtually any type of sequences. * This should be used by who does not care of the sequence type. */ class DefaultAlphabet: public LetterAlphabet { protected: void registerState(const AlphabetState& st) { AbstractAlphabet::registerState(* (st.clone())); } const std::string chars_; public: // class constructor DefaultAlphabet(); // class destructor virtual ~DefaultAlphabet() {} public: unsigned int getSize() const { return 26; } unsigned int getNumberOfTypes() const { return 27; } std::string getAlphabetType() const { return "Default alphabet"; } int getUnknownCharacterCode() const { return 37; } bool isUnresolved(int state) const { return state == 37; } bool isUnresolved(const std::string& state) const { return false; } }; } //end of namespace bpp. #endif // _DEFAULTALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/WordAlphabet.h000644 000000 000000 00000024507 12147656566 021376 0ustar00rootroot000000 000000 // // File: WordAlphabet.h // Created by: Laurent Gueguen // Created on: Sun Dec 28 2008 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _WORDALPHABET_H_ #define _WORDALPHABET_H_ #include "AbstractAlphabet.h" // From the STL: #include #include #include "../Sequence.h" namespace bpp { /** * @brief The base class for word alphabets. * * These alphabets are compounds of several alphabets. The only * constraint on these alphabets is that their words have length one * (so it is not possible to make WordAlphabets from other * WordAlphabets). The construction is made from a vector of pointers * to AbstractAlphabets. * * The strings of the WordAlphabet are concatenations of the strings * of the Alphabets. They are made from the resolved letters of the * Alphabets. */ class WordAlphabet : public AbstractAlphabet { protected: std::vector vAbsAlph_; public: // Constructor and destructor. /** * @brief Builds a new word alphabet from a vector of Alphabets. * * The unit alphabets are not owned by the world alphabet, and won't * be destroyed when this instance is destroyed. * * @param vAlpha The vector of Alphabets to be used. */ WordAlphabet(const std::vector& vAlpha); /** * @brief Builds a new word alphabet from a pointer to number of * Alphabets. * * @param pAlpha The Pointer to the Alphabet to be used. * @param num the length of the words. */ WordAlphabet(const Alphabet* pAlpha, unsigned int num); virtual ~WordAlphabet() {} public: /** * @name Methods redefined from Alphabet * * @{ */ /** * @brief Get the complete name of a state given its string description. * * In case of undefined characters (i.e. N and X for nucleic alphabets), * this method will return the name of the undefined word. * * @param state The string description of the given state. * @return The name of the state. * @throw BadCharException When state is not a valid char description. */ std::string getName(const std::string& state) const throw (BadCharException); int charToInt(const std::string& state) const throw (BadCharException) { if (state.size() != vAbsAlph_.size()) throw BadCharException(state, "WordAlphabet::charToInt", this); if (containsUnresolved(state)) return getSize(); if (containsGap(state)) return -1; else return AbstractAlphabet::charToInt(state); } unsigned int getSize() const { return getNumberOfChars() - 2; } /** @} */ /** * @brief Returns True if the Alphabet of the letters in the word * are the same type. * */ bool hasUniqueAlphabet() const; /** * @brief Returns the length of the word * */ unsigned int getLength() const { return static_cast(vAbsAlph_.size()); } /** * @brief Returns the number of resolved states + one for unresolved * */ unsigned int getNumberOfTypes() const { return getNumberOfChars() - 1; } std::string getAlphabetType() const; int getUnknownCharacterCode() const { return getSize(); } bool isUnresolved(int state) const { return state == getUnknownCharacterCode(); } bool isUnresolved(const std::string& state) const { return charToInt(state) == getUnknownCharacterCode(); } std::vector getAlias(int state) const throw (BadIntException); std::vector getAlias(const std::string& state) const throw (BadCharException); int getGeneric(const std::vector& states) const throw (BadIntException); std::string getGeneric(const std::vector& states) const throw (BadCharException); private: /** * @name Inner utilitary functions * * @{ */ bool containsUnresolved(const std::string& state) const throw (BadCharException); bool containsGap(const std::string& state) const throw (BadCharException); void build_(); /** @} */ public: /** * @name Word specific methods * * @{ */ /** * @brief Get the pointer to the Alphabet of the n-position. * * @param n The position in the word (starting at 0). * @return The pointer to the Alphabet of the n-position. */ const Alphabet* getNAlphabet(size_t n) const { if (n >= vAbsAlph_.size()) throw IndexOutOfBoundsException("WordAlphabet::getNPosition", n, 0, vAbsAlph_.size()); return vAbsAlph_[n]; } /** * @brief Get the int code for a word given the int code of the underlying positions. * * The int code of each position must match the corresponding alphabet specified at this position. * @param vint description for all the positions. * @param pos the start position to match in the vector. * @return The int code of the word. * @throw IndexOutOfBoundsException In case of wrong position. */ virtual int getWord(const std::vector& vint, size_t pos = 0) const throw (IndexOutOfBoundsException); /** * @brief Get the char code for a word given the char code of the * underlying positions. * * The char code of each position must match the corresponding alphabet specified at this position. * @param vpos vector description for all the positions. * @param pos the start position to match in the vector. * @return The string of the word. * @throw IndexOutOfBoundsException In case of wrong position. */ virtual std::string getWord(const std::vector& vpos, size_t pos = 0) const throw (IndexOutOfBoundsException, BadCharException); /** * @brief Get the int code of the n-position of a word given its int description. * * @param word The int description of the word. * @param n The position in the word (starting at 0). * @return The int description of the n-position of the word. */ int getNPosition(int word, size_t n) const throw (BadIntException) { if (n >= vAbsAlph_.size()) throw IndexOutOfBoundsException("WordAlphabet::getNPosition", n, 0, vAbsAlph_.size()); std::string s = intToChar(word); return vAbsAlph_[n]->charToInt(s.substr(n, 1)); } /** * @brief Get the int codes of each position of a word given its int description. * * @param word The int description of the word. * @return The int description of the positions of the codon. */ std::vector getPositions(int word) const throw (BadIntException) { std::string s = intToChar(word); std::vector positions; for (size_t i = 0; i < s.size(); i++) { positions.push_back(vAbsAlph_[i]->charToInt(s.substr(i, 1))); } return positions; } /** * @brief Get the char code of the n-position of a word given its char description. * * @param word The char description of the word. * @param n The position in the word (starting at 0). * @return The char description of the n-position of the word. */ std::string getNPosition (const std::string& word, size_t n) const throw (BadCharException) { if (n > vAbsAlph_.size()) throw BadCharException("", "WordAlphabet::getNPosition", this); // Test: charToInt(word); return "" + word.substr(n, 1); } /** * @brief Get the char codes of each position of a word given its char description. * * @param word The char description of the word. * @return The char description of the three positions of the word. */ std::vector getPositions(const std::string& word) const throw (BadCharException) { charToInt(word); std::vector positions; for (size_t i = 0; i < word.size(); i++) { positions.push_back(word.substr(i, 1)); } return positions; } /** * @brief Translate a whole sequence from letters alphabet to words alphabet. * * @param sequence A sequence in letters alphabet. * @param pos the start postion (default 0) * @return The corresponding sequence in words alphabet. * @throw AlphabetMismatchException If the sequence alphabet do not match the source alphabet. * @throw Exception Other kind of error, depending on the implementation. */ Sequence* translate(const Sequence &sequence, size_t = 0) const throw (AlphabetMismatchException, Exception); /** * @brief Translate a whole sequence from words alphabet to letters alphabet. * * @param sequence A sequence in words alphabet. * @return The corresponding sequence in letters alphabet. * @throw AlphabetMismatchException If the sequence alphabet do not match the target alphabet. * @throw Exception Other kind of error, depending on the implementation. */ Sequence* reverse(const Sequence& sequence) const throw (AlphabetMismatchException, Exception); /** @} */ /** * @name Overloaded AbstractAlphabet methods. * @{ */ unsigned int getStateCodingSize() const { return static_cast(vAbsAlph_.size()); } /** @} */ }; } // end of namespace bpp. #endif // _WORDALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/CaseMaskedAlphabet.h000644 000000 000000 00000010472 12147656566 022457 0ustar00rootroot000000 000000 // // File: CaseMaskedAlphabet.h // Created by: Julien Dutheil // Created on: Sun Sep 05 2010 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _CASEMASKEDALPHABET_H_ #define _CASEMASKEDALPHABET_H_ #include "LetterAlphabet.h" //From the STL: #include namespace bpp { /** * @brief Case-sensitive letter alphabet. * * This alphabet is used for parsing comodity. * It takes as input another LetterAlphabet and will create duplicate any aphanumerical and upper case state, * by creating a lower case version of the state, also named "masked" state. * Helper functions are provided to determine whether a given state is masked or unmasked. */ class CaseMaskedAlphabet : public LetterAlphabet { public: const LetterAlphabet* nocaseAlphabet_; public: CaseMaskedAlphabet(const LetterAlphabet* nocaseAlphabet); CaseMaskedAlphabet(const CaseMaskedAlphabet& cma) : LetterAlphabet(cma), nocaseAlphabet_(cma.nocaseAlphabet_) {} CaseMaskedAlphabet& operator=(const CaseMaskedAlphabet& cma) { LetterAlphabet::operator=(cma); nocaseAlphabet_ = cma.nocaseAlphabet_; return *this; } public: unsigned int getSize() const { return nocaseAlphabet_->getSize(); } unsigned int getNumberOfTypes() const { return nocaseAlphabet_->getNumberOfTypes(); } std::string getAlphabetType() const { return "Default alphabet"; } int getUnknownCharacterCode() const { return nocaseAlphabet_->getUnknownCharacterCode(); } bool isUnresolved(int state) const { return nocaseAlphabet_->isUnresolved(state); } bool isUnresolved(const std::string& state) const { return nocaseAlphabet_->isUnresolved(state); } const Alphabet* getUnmaskedAlphabet() const { return nocaseAlphabet_; } bool isMasked(int state) const { return state >= 100; } bool isMasked(const std::string& state) const { char c = state.c_str()[0]; return isMasked(c); } bool isMasked(char state) const { return isalpha(state) && !isupper(state); } /** * @brief Get the masked state equivalent to the input one. * * If the input state is masked, returns it "as is". * @param state The input state. * @throw BadIntException if the input state is not supported, or if there is no quivallent masked state. */ int getMaskedEquivalentState(int state) const throw (BadIntException); /** * @brief Get the masked state equivalent to the input one. * * If the input state is masked, returns it "as is". * @param state The input state. * @throw BadCharException if the input state is not supported. * @throw BadIntException if there is no equivalent masked state. */ const std::string getMaskedEquivalentState(const std::string& state) const throw (BadCharException, BadIntException); }; } // end of namespace bpp #endif //_CASEMASKEDALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/InvertebrateMitochondrialCodonAlphabet.h000644 000000 000000 00000004720 12147656566 026610 0ustar00rootroot000000 000000 // // File: InvertebrateMitochondrialCodonAlphabet.h // Created by: Eric Bazin // Created on: thu mar 1 14:25:09 CET 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _INVERTEBRATEMITOCHONDRIALCODONALPHABET_H_ #define _INVERTEBRATEMITOCHONDRIALCODONALPHABET_H_ #include "CodonAlphabet.h" namespace bpp { /** * @brief This class implements the Invertebrate * @author Laurent Guéguen * * Mitochondrial codon alphabet as describe on the NCBI website: * http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG5 */ class InvertebrateMitochondrialCodonAlphabet: public CodonAlphabet { public: InvertebrateMitochondrialCodonAlphabet(const NucleicAlphabet * alpha); virtual ~InvertebrateMitochondrialCodonAlphabet() {}; public: std::string getAlphabetType() const { return "Codon alphabet: InvertebrateMitochondrial(" + vAbsAlph_[0]->getAlphabetType() + ")"; } }; } //end of namespace bpp. #endif //_INVERTEBRATEMITOCHONDRIALCODONALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/EchinodermMitochondrialCodonAlphabet.cpp000644 000000 000000 00000004736 12147656566 026575 0ustar00rootroot000000 000000 // // File: EchinodermMitochondrialCodonAlphabet.h // Authors: Eric Bazin // Sylvain Gaillard // Created on: 14 11:31:27 CET 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "EchinodermMitochondrialCodonAlphabet.h" using namespace bpp; using namespace std; EchinodermMitochondrialCodonAlphabet::EchinodermMitochondrialCodonAlphabet(const NucleicAlphabet* alpha) : CodonAlphabet(alpha) { string A = alpha->intToChar(0); string G = alpha->intToChar(2); string T = alpha->intToChar(3); vector vstop; vstop.push_back(T + A + A); vstop.push_back(T + A + G); int istop; unsigned int j; for (unsigned int i = 0 ; i < vstop.size() ; i++) { istop = charToInt(vstop[i]); stopCodons_.push_back(istop); j = 0; while (j < getNumberOfChars()) { if (getStateAt(j).getNum() == istop) { getStateAt(j).setName(STOP); break; } j++; } } initCodon_ = charToInt(A + T + G); } bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/CaseMaskedAlphabet.cpp000644 000000 000000 00000006527 12147656566 023020 0ustar00rootroot000000 000000 // // File: CaseMaskedAlphabet.cpp // Created by: Julien Dutheil // Created on: Sun Sep 05 2010 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "CaseMaskedAlphabet.h" #include using namespace bpp; //From the STL: #include #include using namespace std; CaseMaskedAlphabet::CaseMaskedAlphabet(const LetterAlphabet* nocaseAlphabet) : LetterAlphabet(true), nocaseAlphabet_(nocaseAlphabet) { vector chars = nocaseAlphabet_->getSupportedChars(); for (unsigned int i = 0; i < chars.size(); ++i) { AlphabetState state = nocaseAlphabet_->getState(chars[i]); registerState(state); char c = *chars[i].c_str(); if (isalpha(c)) { if (isupper(c)) { registerState(AlphabetState(state.getNum() + 100, TextTools::toLower(state.getLetter()), string("Masked ") + state.getName())); } } } } int CaseMaskedAlphabet::getMaskedEquivalentState(int state) const throw (BadIntException) { if (!isIntInAlphabet(state)) throw BadIntException(state, "CaseMaskedAlphabet::getMaskedEquivalentState. Unsupported state code."); if (state >= 100) return state; else { state += 100; if (!isIntInAlphabet(state)) throw BadIntException(state, "CaseMaskedAlphabet::getMaskedEquivalentState. State has masked equivalent."); return state; } } const string CaseMaskedAlphabet::getMaskedEquivalentState(const string& state) const throw (BadCharException, BadIntException) { if (!isCharInAlphabet(state)) throw BadCharException(state, "CaseMaskedAlphabet::getMaskedEquivalentState. Unsupported state code."); int code = charToInt(state); if (code >= 100) return state; else { code += 100; if (!isIntInAlphabet(code)) throw BadIntException(code, "CaseMaskedAlphabet::getMaskedEquivalentState. State has masked equivalent."); return intToChar(code); } } bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/NucleicAlphabetState.h000644 000000 000000 00000010125 12147656566 023035 0ustar00rootroot000000 000000 // // File: NucleicAlphabetState.h // Author: Sylvain Gaillard // Created: 31/07/2009 // /* Copyright or © or Copr. Bio++ Development Team, (July 29, 2009) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _NUCLEICALPHABETSTATE_H_ #define _NUCLEICALPHABETSTATE_H_ // From the STL #include #include namespace bpp { /** * @brief This is the base class to describe states in a NucleicAlphabet. * * This class store a binary code for each state. * This binary code is of length 4, one bit per nucleotid. * For DNA alphabet, this code looks like this: * * * * * * * * * * * * * *
LetterBinary codeint value
-00000
A00011
C00102
G01004
T10008
N111115
...
M00113
W10019
...
V01117
* This notation allows the use of bitwize operations like: * - build a generic state from to states * @code * A | G = R <=> 0001 | 0100 = 0101 * @endcode * - extract a state from an unresolved one by subtraction of an other state * @code * S & ~ C = G <=> 0110 & ~ 0010 = 0100 * @endcode * * The binary code is stored as a char because it's the smallest memory word * that can be allocated. A char is 8 bits long allowing the use of this * class with Alphabet of at least 8 resolved states (enough for known * nucleic alphabets!). * * @author Sylvain Gaillard */ class NucleicAlphabetState: public AlphabetState { private: int binCode_; public: NucleicAlphabetState(int num, const std::string& letter, unsigned char code, const std::string& name): AlphabetState(num, letter, name), binCode_(code) {} // Class destructor virtual ~NucleicAlphabetState() {} public: NucleicAlphabetState* clone() const { return new NucleicAlphabetState(* this); } /** * @brief Get the state's binary representation. * * @return The state's binary representation. */ int getBinaryCode() const { return binCode_; } /** * @brief Set the state's binary representation. * * @param code The state's binary representation. */ void setBinaryCode(int code) { binCode_ = code; } }; } #endif // _NUCLEICALPHABETSTATE_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/RNA.cpp000644 000000 000000 00000013461 12147656566 017772 0ustar00rootroot000000 000000 // // File: RNA.cpp // Authors: Guillaume Deuchst // Julien Dutheil // Sylvain Gaillard // Created on: Tue Jul 22 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "RNA.h" #include #include using namespace bpp; // From STL: #include using namespace std; /******************************************************************************/ // class constructor RNA::RNA(bool exclamationMarkCountsAsGap) { // Alphabet size definition resize(21); // Alphabet content definition // all unresolved bases use n°14 setState( 0, NucleicAlphabetState(-1, "-", 0, "Gap")); setState( 1, NucleicAlphabetState( 0, "A", 1, "Adenine")); setState( 2, NucleicAlphabetState( 1, "C", 2, "Cytosine")); setState( 3, NucleicAlphabetState( 2, "G", 4, "Guanine")); setState( 4, NucleicAlphabetState( 3, "U", 8, "Uracile")); setState( 5, NucleicAlphabetState( 4, "M", 3, "Adenine or Cytosine")); setState( 6, NucleicAlphabetState( 5, "R", 5, "Purine (Adenine or Guanine)")); setState( 7, NucleicAlphabetState( 6, "W", 9, "Adenine or Uracile")); setState( 8, NucleicAlphabetState( 7, "S", 6, "Cytosine or Guanine")); setState( 9, NucleicAlphabetState( 8, "Y", 10, "Pyrimidine (Cytosine or Uracile)")); setState(10, NucleicAlphabetState( 9, "K", 12, "Guanine or Uracile")); setState(11, NucleicAlphabetState(10, "V", 7, "Adenine or Cytosine or Guanine")); setState(12, NucleicAlphabetState(11, "H", 11, "Adenine or Cytosine or Uracile")); setState(13, NucleicAlphabetState(12, "D", 13, "Adenine or Guanine or Uracile")); setState(14, NucleicAlphabetState(13, "B", 14, "Cytosine or Guanine or Uracile")); setState(15, NucleicAlphabetState(14, "N", 15, "Unresolved base")); setState(16, NucleicAlphabetState(14, "X", 15, "Unresolved base")); setState(17, NucleicAlphabetState(14, "O", 15, "Unresolved base")); setState(18, NucleicAlphabetState(14, "0", 15, "Unresolved base")); setState(19, NucleicAlphabetState(14, "?", 15, "Unresolved base")); if (exclamationMarkCountsAsGap) setState(20, NucleicAlphabetState(-1, "!", 0, "Unresolved base")); else setState(20, NucleicAlphabetState(14, "!", 15, "Unresolved base")); } /******************************************************************************/ std::vector RNA::getAlias(int state) const throw (BadIntException) { if (!isIntInAlphabet(state)) throw BadIntException(state, "DNA::getAlias(int): Specified base unknown."); vector v; const NucleicAlphabetState& st = getState(state); if (state == -1) v.push_back(-1); if (st.getBinaryCode() & 1) v.push_back(0); if (st.getBinaryCode() & 2) v.push_back(1); if (st.getBinaryCode() & 4) v.push_back(2); if (st.getBinaryCode() & 8) v.push_back(3); return v; } /******************************************************************************/ std::vector RNA::getAlias(const std::string & state) const throw (BadCharException) { string locstate = TextTools::toUpper(state); if(!isCharInAlphabet(locstate)) throw BadCharException(locstate, "RNA::getAlias(int): Specified base unknown."); vector vi = this->getAlias(this->charToInt(state)); vector v; for (unsigned int i = 0 ; i < vi.size() ; i++) v.push_back(this->intToChar(vi[i])); return v; } /******************************************************************************/ int RNA::getGeneric(const std::vector & states) const throw (BadIntException) { int v = 0; for (size_t i = 0 ; i < states.size() ; ++i) { if (!isIntInAlphabet(states[i])) throw BadIntException(states[i], "RNA::getGeneric(const vector& states): Specified base unknown."); v |= getState(states[i]).getBinaryCode(); } return getStateByBinCode(v).getNum(); } /******************************************************************************/ std::string RNA::getGeneric(const std::vector & states) const throw (BadCharException) { vector vi; for (unsigned int i = 0 ; i < states.size() ; ++i) { if (!isCharInAlphabet(states[i])) throw BadCharException(states[i], "DNA::getGeneric(const vector& states): Specified base unknown."); vi.push_back(this->charToInt(states[i])); } return intToChar(getGeneric(vi)); } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/RNY.h000644 000000 000000 00000007561 12147656566 017473 0ustar00rootroot000000 000000 // // File: RNY.h // Created by: Laurent Gueguen // Created on: Tue Jul 31 2007 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _RNY_H_ #define _RNY_H_ #include "AbstractAlphabet.h" #include "NucleicAlphabet.h" #include /** * @brief This alphabet is used to deal quotiented triplet RNY + * combinations with "-". * * More explicitly, a quotiented triplet RNY is defined on the alphabet * * @f\{C,T,R\} x \{A,C,G,T\} x \{ A,G,Y\}@f. * * Triplet "---" is the only triplet defined as gap. Other triplets * with "-" are defined as unresolved. * * @author Laurent Guéguen */ namespace bpp { class RNY : public AbstractAlphabet { private: const NucleicAlphabet& nuclalph_; public: RNY(const NucleicAlphabet&); ~RNY() {} public: int charToInt(const std::string& state) const throw (BadCharException); std::string intToChar(int state) const throw (BadIntException); bool containsGap(const std::string& state) const throw (BadCharException); unsigned int getSize() const { return 36; } unsigned int getNumberOfTypes() const { return 80; } int getUnknownCharacterCode() const { return 350; } bool isGap(int state) const; std::vector getAlias( int state) const throw (BadIntException); std::vector getAlias(const std::string& state) const throw (BadCharException); bool isUnresolved(int state) const; bool isUnresolved(const std::string& state) const; std::string getAlphabetType() const { return "RNY alphabet"; } const NucleicAlphabet& getLetterAlphabet() const; public: /** * @brief Get the char code for a triplet given the char code of the three underlying positions. * * The char code of each position must match the nucleic alphabet specified for this alphabet. * NB: This performs pos1 + pos2 + pos3 after checking for each position validity. * @param pos1 Char description for position 1. * @param pos2 Char description for position 2. * @param pos3 Char description for position 3. * @return The Char code of the triplet. */ std::string getRNY(const std::string&, const std::string&, const std::string&) const throw (BadCharException); int getRNY(int, int, int, const Alphabet&) const throw (BadCharException); }; } #endif // _RNY_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/NucleicAlphabet.h000644 000000 000000 00000017737 12147656566 022054 0ustar00rootroot000000 000000 // // File: NucleicAlphabet.h // Authors: Guillaume Deuchst // Julien Dutheil // Sylvain Gaillard // Created on: Tue Jul 22 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _NUCLEICALPHABET_H_ #define _NUCLEICALPHABET_H_ #include "LetterAlphabet.h" #include "NucleicAlphabetState.h" #include #include namespace bpp { /** * @brief The abstract base class for nucleic alphabets. * * This class only implements a few methods, it is mainly designed for methods/classes * that will require to work with both RNA and DNA. */ class NucleicAlphabet : public LetterAlphabet { private: std::map binCodes_; void updateMaps_(int pos, const NucleicAlphabetState& st) { if (binCodes_.find(st.getBinaryCode()) == binCodes_.end()) binCodes_[st.getBinaryCode()] = pos; } public: NucleicAlphabet(): binCodes_() {} virtual ~NucleicAlphabet() {} protected: /** * @name Overloaded methods from AbstractAlphabet * @{ */ void registerState(const NucleicAlphabetState& st) { LetterAlphabet::registerState(st); updateMaps_(getNumberOfChars(), st); } void setState(unsigned int pos, const NucleicAlphabetState& st) { LetterAlphabet::setState(pos, st); updateMaps_(pos, st); } const NucleicAlphabetState& getStateAt(unsigned int pos) const throw (IndexOutOfBoundsException) { return dynamic_cast( AbstractAlphabet::getStateAt(pos) ); } NucleicAlphabetState& getStateAt(unsigned int pos) throw (IndexOutOfBoundsException) { return dynamic_cast( AbstractAlphabet::getStateAt(pos) ); } /** @} */ public: /** * @name Overloaded methods from AbstractAlphabet * @{ */ const NucleicAlphabetState& getState(const std::string& letter) const throw (BadCharException) { return dynamic_cast( AbstractAlphabet::getState(letter) ); } const NucleicAlphabetState& getState(int num) const throw (BadIntException) { return dynamic_cast( AbstractAlphabet::getState(num) ); } /** @} */ /** * @name Specific methods * @{ */ /** * @brief Get a state by its binary representation. * * @param code The binary representation as an unsigned char. * @return The NucleicAlphabetState. * @throw BadIntException If the code is not a valide state. * @author Sylvain Gaillard */ const NucleicAlphabetState& getStateByBinCode(int code) const throw (BadIntException) { std::map::const_iterator it = binCodes_.find(code); if (it == binCodes_.end()) throw BadIntException(code, "NucleicAlphabet::getState(unsigned char): Binary code not in alphabet", this); return getStateAt(it->second); } /** * @brief Subtract states * * Get the remaining state when subtracting one state to another. * * @code * int a = alpha->charToInt("A"); * int m = alpha->charToInt("M"); * int c = alpha->subtract(m, a); * * cout << alpha->intToChar(c) << endl; * * // should print C because M - A = C * @endcode * * @param s1 the first state as an int * @param s2 the second state as an int * @throw BadIntException if one of the states is not valide. * @return The remaining state as an int * @author Sylvain Gaillard */ int subtract(int s1, int s2) const throw (BadIntException) { return getStateByBinCode(getState(s1).getBinaryCode() & ~ getState(s2).getBinaryCode()).getNum(); } /** * @brief Subtract states * * Get the remaining state when subtracting one state to another. * * @code * string a = "A"; * string m = "M"; * * cout << alpha->subtract(m, a) << endl; * * // should print C because M - A = C * @endcode * * @param s1 the first state as a string * @param s2 the second state as a string * @throw BadCharException if one of the states is not valide. * @return The remaining state as a string * @author Sylvain Gaillard */ std::string subtract(const std::string& s1, const std::string& s2) const throw (BadCharException) { return intToChar(subtract(charToInt(s1), charToInt(s2))); } /** * @brief Get the overlap between to states * * Get the overlapping states between two steps. * * @code * int m = alpha->charToInt("M"); * int r = alpha->charToInt("R"); * int a = alpha->getOverlap(m, r); * * cout << alpha->intToChar(a) << endl; * * // should print A because M = A/C and R = A/G * @endcode * * @param s1 the first state as an int * @param s2 the second state as an int * @throw BadIntException if one of the states is not valid * @return The overlapping state * @author Sylvain Gaillard */ int getOverlap(int s1, int s2) const throw (BadIntException) { return getStateByBinCode(getState(s1).getBinaryCode() & getState(s2).getBinaryCode()).getNum(); } /** * @brief Get the overlap between to states * * Get the overlapping states between two steps. * * @code * string m = "M"; * string r = R; * * cout << alpha->getOverlap(m, r) << endl; * * // should print A because M = A/C and R = A/G * @endcode * * @param s1 the first state as a string * @param s2 the second state as a string * @throw BadCharException if one of the states is not valid * @return The overlapping state * @author Sylvain Gaillard */ std::string getOverlap(const std::string& s1, const std::string& s2) const throw (BadCharException) { return intToChar(getOverlap(charToInt(s1), charToInt(s2))); } /** @} */ public: // return 4 : A, C, G, T (or U) unsigned int getSize() const { return 4; } // return 15 : gap isn't included, generic unresolved bases (N, X, ?, O, 0) count for one unsigned int getNumberOfTypes() const { return 15; } int getUnknownCharacterCode() const { return 14; } bool isUnresolved(int state) const { return state > 3; } bool isUnresolved(const std::string& state) const { return charToInt(state) > 3; } }; } //end of namespace bpp. #endif // _NUCLEICALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/VertebrateMitochondrialCodonAlphabet.h000644 000000 000000 00000004702 12147656566 026261 0ustar00rootroot000000 000000 // // File: VertebrateMitochondrialCodonAlphabet.h // Created by: Eric Bazin // Created on: wen mar 2 16:01:59 CET 2005 /// /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _VERTEBRATEMITOCHONDRIALCODONALPHABET_H_ #define _VERTEBRATEMITOCHONDRIALCODONALPHABET_H_ #include "CodonAlphabet.h" namespace bpp { /** * @brief This class implements the vertebrate mitochondrial codon alphabet as describe on the NCBI * web site: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG2 * @author Laurent Guéguen */ class VertebrateMitochondrialCodonAlphabet : public CodonAlphabet { public: VertebrateMitochondrialCodonAlphabet(const NucleicAlphabet * alpha); virtual ~VertebrateMitochondrialCodonAlphabet() {}; public: std::string getAlphabetType() const { return "Codon alphabet: VertebrateMitochondrial(" + vAbsAlph_[0]->getAlphabetType() + ")"; } }; } //end of namespace bpp. #endif //_VERTEBRATEMITOCHONDRIALCODONALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/CodonAlphabet.cpp000644 000000 000000 00000007361 12147656566 022057 0ustar00rootroot000000 000000 // // File: CodonAlphabet.h // Created by: Julien Dutheil // Created on: Sun Oct 12 17:41:56 2003 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "CodonAlphabet.h" #include using namespace bpp; // From the STL: #include using namespace std; const string CodonAlphabet::STOP = "Stop"; /******************************************************************************/ unsigned int CodonAlphabet::numberOfStopCodons() const { return static_cast(stopCodons_.size()); } int CodonAlphabet::getCodon(int pos1, int pos2, int pos3) const throw (BadIntException) { vector vint; vint.push_back(pos1); vint.push_back(pos2); vint.push_back(pos3); return getWord(vint); } /******************************************************************************/ string CodonAlphabet::getCodon(const string & pos1, const string & pos2, const string & pos3) const throw (BadCharException) { vector vint; vint.push_back(pos1); vint.push_back(pos2); vint.push_back(pos3); return getWord(vint); } /******************************************************************************/ int CodonAlphabet::getFirstPosition (int codon) const throw (BadIntException) { return getNPosition(codon,0); } /******************************************************************************/ int CodonAlphabet::getSecondPosition(int codon) const throw (BadIntException) { return getNPosition(codon,1); } /******************************************************************************/ int CodonAlphabet::getThirdPosition (int codon) const throw (BadIntException) { return getNPosition(codon,2); } /******************************************************************************/ string CodonAlphabet::getFirstPosition (const string & codon) const throw (BadCharException) { return getNPosition(codon,0); } /******************************************************************************/ string CodonAlphabet::getSecondPosition(const string & codon) const throw (BadCharException) { return getNPosition(codon,1); } /******************************************************************************/ string CodonAlphabet::getThirdPosition (const string & codon) const throw (BadCharException) { return getNPosition(codon,2); } bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/StandardCodonAlphabet.h000644 000000 000000 00000004436 12147656566 023205 0ustar00rootroot000000 000000 // // File: StandardCodonAlphabet.h // Created by: Julien Dutheil // Created on: Sun Oct 12 17:51:36 2003 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _STANDARDCODONALPHABET_H_ #define _STANDARDCODONALPHABET_H_ #include "CodonAlphabet.h" namespace bpp { /** * @brief This class implements the standard codon alphabet as describe on the NCBI * web site: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG1 */ class StandardCodonAlphabet : public CodonAlphabet { public: StandardCodonAlphabet(const NucleicAlphabet * alpha); virtual ~StandardCodonAlphabet() {} public: std::string getAlphabetType() const { return "Codon alphabet: Standard("+ vAbsAlph_[0]->getAlphabetType() + ")"; } }; } //end of namespace bpp. #endif //_STANDARDCODONALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/AlphabetExceptions.cpp000644 000000 000000 00000007461 12147656566 023137 0ustar00rootroot000000 000000 // // File: AlphabetExceptions.cpp // Created by: Julien Dutheil // Created on: Mon Nov 3 16:41:53 2003 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "AlphabetExceptions.h" #include "Alphabet.h" #include using namespace bpp; using namespace std; /****************************************************************************** * Alphabet exceptions: * ******************************************************************************/ AlphabetException::AlphabetException(const std::string& text, const Alphabet* alpha) : Exception("AlphabetException: " + text + (alpha ? "(" + (alpha->getAlphabetType()) + ")" : string(""))), alphabet_(alpha) {} /******************************************************************************/ BadCharException::BadCharException(const std::string& badChar, const std::string& text, const Alphabet* alpha) : AlphabetException("BadCharException: " + badChar + ". " + text, alpha), c_(badChar) {} string BadCharException::getBadChar() const { return c_; } /******************************************************************************/ BadIntException::BadIntException(int badInt, const std::string& text, const Alphabet* alpha) : AlphabetException("BadIntException: " + TextTools::toString(badInt) + ". " + text, alpha), i_(badInt) {} int BadIntException::getBadInt() const { return i_; } /******************************************************************************/ AlphabetMismatchException::AlphabetMismatchException(const std::string& text, const Alphabet* alpha1, const Alphabet* alpha2) : Exception("AlphabetMismatchException: " + text + (alpha1 != 0 && alpha2 != 0 ? "(" + alpha1->getAlphabetType() + ", " + alpha2->getAlphabetType() + ")" : string(""))), alphabet1_(alpha1), alphabet2_(alpha2) {} vector AlphabetMismatchException::getAlphabets() const { vector v(2); v[0] = alphabet1_; v[1] = alphabet2_; return v; } /******************************************************************************/ CharStateNotSupportedException::CharStateNotSupportedException(const string & text, const Alphabet * alpha) : AlphabetException("CharStateNotSupportedException: " + text, alpha) {}; /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/DNA.cpp000644 000000 000000 00000013466 12147656566 017761 0ustar00rootroot000000 000000 // // File: DNA.cpp // Authors: Guillaume Deuchst // Julien Dutheil // Sylvain Gaillard // Created on: Tue Jul 22 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "DNA.h" #include "AlphabetState.h" #include #include using namespace bpp; // From STL: #include using namespace std; /******************************************************************************/ DNA::DNA(bool exclamationMarkCountsAsGap) { // Alphabet size definition resize(21); // Alphabet content definition // all unresolved bases use n°14 setState( 0, NucleicAlphabetState(-1, "-", 0, "Gap")); setState( 1, NucleicAlphabetState( 0, "A", 1, "Adenine")); setState( 2, NucleicAlphabetState( 1, "C", 2, "Cytosine")); setState( 3, NucleicAlphabetState( 2, "G", 4, "Guanine")); setState( 4, NucleicAlphabetState( 3, "T", 8, "Thymine")); setState( 5, NucleicAlphabetState( 4, "M", 3, "Adenine or Cytosine")); setState( 6, NucleicAlphabetState( 5, "R", 5, "Purine (Adenine or Guanine)")); setState( 7, NucleicAlphabetState( 6, "W", 9, "Adenine or Thymine")); setState( 8, NucleicAlphabetState( 7, "S", 6, "Cytosine or Guanine")); setState( 9, NucleicAlphabetState( 8, "Y", 10, "Pyrimidine (Cytosine or Thymine)")); setState(10, NucleicAlphabetState( 9, "K", 12, "Guanine or Thymine")); setState(11, NucleicAlphabetState(10, "V", 7, "Adenine or Cytosine or Guanine")); setState(12, NucleicAlphabetState(11, "H", 11, "Adenine or Cytosine or Thymine")); setState(13, NucleicAlphabetState(12, "D", 13, "Adenine or Guanine or Thymine")); setState(14, NucleicAlphabetState(13, "B", 14, "Cytosine or Guanine or Thymine")); setState(15, NucleicAlphabetState(14, "N", 15, "Unresolved base")); setState(16, NucleicAlphabetState(14, "X", 15, "Unresolved base")); setState(17, NucleicAlphabetState(14, "O", 15, "Unresolved base")); setState(18, NucleicAlphabetState(14, "0", 15, "Unresolved base")); setState(19, NucleicAlphabetState(14, "?", 15, "Unresolved base")); if (exclamationMarkCountsAsGap) setState(20, NucleicAlphabetState(-1, "!", 0, "Unresolved base")); else setState(20, NucleicAlphabetState(14, "!", 15, "Unresolved base")); } /******************************************************************************/ std::vector DNA::getAlias(int state) const throw (BadIntException) { if (!isIntInAlphabet(state)) throw BadIntException(state, "DNA::getAlias(int): Specified base unknown."); vector v; const NucleicAlphabetState& st = getState(state); if (state == -1) v.push_back(-1); if (st.getBinaryCode() & 1) v.push_back(0); if (st.getBinaryCode() & 2) v.push_back(1); if (st.getBinaryCode() & 4) v.push_back(2); if (st.getBinaryCode() & 8) v.push_back(3); return v; } /******************************************************************************/ std::vector DNA::getAlias(const std::string& state) const throw (BadCharException) { string locstate = TextTools::toUpper(state); if(!isCharInAlphabet(locstate)) throw BadCharException(locstate, "DNA::getAlias(int): Specified base unknown."); vector vi = this->getAlias(this->charToInt(state)); vector v; for (unsigned int i = 0 ; i < vi.size() ; i++) v.push_back(this->intToChar(vi[i])); return v; } /******************************************************************************/ int DNA::getGeneric(const std::vector& states) const throw (BadIntException) { int v = 0; for (size_t i = 0 ; i < states.size() ; ++i) { if (!isIntInAlphabet(states[i])) throw BadIntException(states[i], "DNA::getGeneric(const vector& states): Specified base unknown."); v |= getState(states[i]).getBinaryCode(); } return getStateByBinCode(v).getNum(); } /******************************************************************************/ std::string DNA::getGeneric(const std::vector& states) const throw (BadCharException) { vector vi; for (unsigned int i = 0 ; i < states.size() ; ++i) { if (!isCharInAlphabet(states[i])) throw BadCharException(states[i], "DNA::getGeneric(const vector& states): Specified base unknown."); vi.push_back(this->charToInt(states[i])); } return intToChar(getGeneric(vi)); } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/YeastMitochondrialCodonAlphabet.h000644 000000 000000 00000004627 12147656566 025251 0ustar00rootroot000000 000000 // // File: YeastbrateMitochondrialCodonAlphabet.h // Created by: Benoit Nabholz // Created on: Sun Oct 10 14:33 CET 2010 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _YEASTMITOCHONDRIALCODONALPHABET_H_ #define _YEASTMITOCHONDRIALCODONALPHABET_H_ #include "CodonAlphabet.h" namespace bpp { /** * @brief This class implements the Yeast * @author Laurent Guéguen * * Mitochondrial codon alphabet as describe on the NCBI website: * http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG3 */ class YeastMitochondrialCodonAlphabet: public CodonAlphabet { public: YeastMitochondrialCodonAlphabet(const NucleicAlphabet * alpha); virtual ~YeastMitochondrialCodonAlphabet() {}; public: std::string getAlphabetType() const { return "Codon alphabet: YeastMitochondrial(" + vAbsAlph_[0]->getAlphabetType() + ")"; } }; } //end of namespace bpp. #endif //_YEASTMITOCHONDRIALCODONALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/VertebrateMitochondrialCodonAlphabet.cpp000644 000000 000000 00000005045 12147656566 026615 0ustar00rootroot000000 000000 // // File: VertebrateMitochondrialCodonAlphabet.h // Authors: Eric Bazin // Sylvain Gaillard // Created on: thu mar 1 14:25:09 CET 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "VertebrateMitochondrialCodonAlphabet.h" using namespace bpp; using namespace std; VertebrateMitochondrialCodonAlphabet::VertebrateMitochondrialCodonAlphabet(const NucleicAlphabet* alpha) : CodonAlphabet(alpha) { string A = alpha->intToChar(0); string G = alpha->intToChar(2); string T = alpha->intToChar(3); vector vstop; vstop.push_back(T + A + A); vstop.push_back(T + A + G); vstop.push_back(A + G + G); vstop.push_back(A + G + A); int istop; unsigned int j; for (unsigned int i = 0 ; i < vstop.size() ; i++) { istop = charToInt(vstop[i]); stopCodons_.push_back(istop); j = 0; while (j < getNumberOfChars()) { if (getStateAt(j).getNum() == istop) { getStateAt(j).setName(STOP); break; } j++; } } initCodon_ = charToInt(A + T + G); } bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/AbstractAlphabet.cpp000644 000000 000000 00000022527 12147656566 022561 0ustar00rootroot000000 000000 // // File: AbstractAlphabet.cpp // Authors: Guillaume Deuchst // Julien Dutheil // Sylvain Gaillard // Created on: Tue Jul 22 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "AbstractAlphabet.h" #include #include using namespace bpp; // From the STL: #include #include using namespace std; /******************************************************************************/ void AbstractAlphabet::updateMaps_(size_t pos, const AlphabetState& st) { if (letters_.find(st.getLetter()) == letters_.end()) letters_[st.getLetter()] = pos; if (nums_.find(st.getNum()) == nums_.end()) nums_[st.getNum()] = pos; } /******************************************************************************/ void AbstractAlphabet::registerState(const AlphabetState& st) { // Add the state to the vector alphabet_.push_back(st.clone()); // Update the maps updateMaps_(alphabet_.size(), st); } /******************************************************************************/ void AbstractAlphabet::setState(size_t pos, const AlphabetState& st) throw (IndexOutOfBoundsException) { if (pos > alphabet_.size()) throw IndexOutOfBoundsException("AbstractAlphabet::setState: incorect position", pos, 0, alphabet_.size()); // Delete the state if not empty if (alphabet_[pos] != 0) delete alphabet_[pos]; // Put the state in the vector alphabet_[pos] = st.clone(); // Update the maps updateMaps_(pos, st); } /******************************************************************************/ const AlphabetState& AbstractAlphabet::getState(const std::string& letter) const throw (BadCharException) { map::const_iterator it = letters_.find(letter); if (it == letters_.end()) throw BadCharException(letter, "AbstractAlphabet::getState(string): Specified base unknown", this); return * (alphabet_[it->second]); } /******************************************************************************/ const AlphabetState& AbstractAlphabet::getState(int num) const throw (BadIntException) { map::const_iterator it = nums_.find(num); if (it == nums_.end()) throw BadIntException(num, "AbstractAlphabet::getState(int): Specified base unknown", this); return * (alphabet_[it->second]); } /******************************************************************************/ AlphabetState& AbstractAlphabet::getStateAt(size_t pos) throw (IndexOutOfBoundsException) { if (pos > alphabet_.size()) throw IndexOutOfBoundsException("AbstractAlphabet::getStateAt: incorect position", pos, 0, alphabet_.size()); return * (alphabet_[pos]); } /******************************************************************************/ const AlphabetState& AbstractAlphabet::getStateAt(size_t pos) const throw (IndexOutOfBoundsException) { if (pos > alphabet_.size()) throw IndexOutOfBoundsException("AbstractAlphabet::getStateAt: incorect position", pos, 0, alphabet_.size()); return * (alphabet_[pos]); } /******************************************************************************/ std::string AbstractAlphabet::getName(const std::string& state) const throw (BadCharException) { return (getState(state)).getName(); } /******************************************************************************/ std::string AbstractAlphabet::getName(int state) const throw (BadIntException) { return (getState(state)).getName(); } /******************************************************************************/ int AbstractAlphabet::charToInt(const std::string& state) const throw (BadCharException) { return getState(state).getNum(); } /******************************************************************************/ std::string AbstractAlphabet::intToChar(int state) const throw (BadIntException) { return (getState(state)).getLetter(); } /******************************************************************************/ bool AbstractAlphabet::isIntInAlphabet(int state) const { map::const_iterator it = nums_.find(state); if (it != nums_.end()) return true; return false; } /******************************************************************************/ bool AbstractAlphabet::isCharInAlphabet(const std::string& state) const { map::const_iterator it = letters_.find(state); if (it != letters_.end()) return true; return false; } /******************************************************************************/ std::vector AbstractAlphabet::getAlias(int state) const throw (BadIntException) { if (!isIntInAlphabet(state)) throw BadIntException(state, "AbstractAlphabet::getAlias(int): Specified base unknown."); vector v(1); v[0] = state; return v; } /******************************************************************************/ std::vector AbstractAlphabet::getAlias(const std::string& state) const throw (BadCharException) { if (!isCharInAlphabet(state)) throw BadCharException(state, "AbstractAlphabet::getAlias(char): Specified base unknown."); vector v(1); v[0] = state; return v; } /******************************************************************************/ int AbstractAlphabet::getGeneric(const std::vector& states) const throw (BadIntException) { map m; for (unsigned int i = 0 ; i < states.size() ; ++i) { vector tmp_s = this->getAlias(states[i]); // get the states for generic characters for (unsigned int j = 0 ; j < tmp_s.size() ; ++j) { m[tmp_s[j]] ++; // add each state to the list } } vector ve = MapTools::getKeys(m); string key; for (unsigned int i = 0 ; i < ve.size() ; ++i) { if (!isIntInAlphabet(ve[i])) throw BadIntException(ve[i], "AbstractAlphabet::getGeneric(const vector): Specified base unknown."); key += "_" + TextTools::toString(ve[i]); } int v; if (ve.size() == 1) { v = ve[0]; } else { v = this->getUnknownCharacterCode(); } return v; } /******************************************************************************/ std::string AbstractAlphabet::getGeneric(const std::vector& states) const throw (AlphabetException) { map m; for (unsigned int i = 0 ; i < states.size() ; ++i) { vector tmp_s = this->getAlias(states[i]); // get the states for generic characters for (unsigned int j = 0 ; j < tmp_s.size() ; ++j) { m[tmp_s[j]] ++; // add each state to the list } } vector ve = MapTools::getKeys(m); string key; for (unsigned int i = 0 ; i < ve.size() ; ++i) { if (!isCharInAlphabet(ve[i])) throw BadCharException(ve[i], "AbstractAlphabet::getAlias(const vector): Specified base unknown."); key += TextTools::toString(ve[i]); } string v; if (ve.size() == 1) { v = ve[0]; } else { throw CharStateNotSupportedException("AbstractAlphabet::getAlias(const vector): No generic char state."); } return v; } /******************************************************************************/ const std::vector& AbstractAlphabet::getSupportedInts() const { if(intList_.size() == 0) { intList_.resize(alphabet_.size()); charList_.resize(alphabet_.size()); for(unsigned int i = 0; i < alphabet_.size(); i++) { intList_[i] = alphabet_[i]->getNum(); charList_[i] = alphabet_[i]->getLetter(); } } return intList_; } /******************************************************************************/ const std::vector& AbstractAlphabet::getSupportedChars() const { if(charList_.size() == 0) { intList_.resize(alphabet_.size()); charList_.resize(alphabet_.size()); for(unsigned int i = 0; i < alphabet_.size(); i++) { intList_[i] = alphabet_[i]->getNum(); charList_[i] = alphabet_[i]->getLetter(); } } return charList_; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/YeastMitochondrialCodonAlphabet.cpp000644 000000 000000 00000004637 12147656566 025605 0ustar00rootroot000000 000000 // // File: YeastbrateMitochondrialCodonAlphabet.cpp // Created by: Benoit Nabholz // Created on: Sun Oct 10 14:33 CET 2010 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "YeastMitochondrialCodonAlphabet.h" using namespace bpp; using namespace std; YeastMitochondrialCodonAlphabet::YeastMitochondrialCodonAlphabet(const NucleicAlphabet * alpha) : CodonAlphabet(alpha) { string A = alpha->intToChar(0); string G = alpha->intToChar(2); string T = alpha->intToChar(3); vector vstop; vstop.push_back(T + A + A); vstop.push_back(T + A + G); int istop; unsigned int j; for (unsigned int i = 0 ; i < vstop.size() ; i++) { istop = charToInt(vstop[i]); stopCodons_.push_back(istop); j = 0; while (j < getNumberOfChars()) { if (getStateAt(j).getNum() == istop) { getStateAt(j).setName(STOP); break; } j++; } } } bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/AbstractAlphabet.h000644 000000 000000 00000016452 12147656566 022226 0ustar00rootroot000000 000000 // // File: AbstractAlphabet.h // Authors: Guillaume Deuchst // Julien Dutheil // Sylvain Gaillard // Created on: Tue Jul 22 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ABSTRACTALPHABET_H_ #define _ABSTRACTALPHABET_H_ #include "Alphabet.h" #include "AlphabetState.h" #include // From the STL: #include #include #include namespace bpp { /** * @brief A partial implementation of the Alphabet interface. * * It contains a vector of AlphabetState. * All methods are based uppon this vector * but do not provide any method to initialize it. * This is up to each constructor of the derived classes. * * @see Alphabet */ class AbstractAlphabet: public Alphabet { private: /** * @brief Alphabet: vector of AlphabetState. */ std::vector alphabet_; /** * @name maps used to quick search for letter and num. * @{ */ std::map letters_; std::map nums_; /** @} */ /** * @brief Update the private maps letters_ and nums_ when adding a state. * * @param pos The index of the state in the alphabet_ vector. * @param st The state that have been added or modified */ void updateMaps_(size_t pos, const AlphabetState& st); protected: /** * @name Available codes * * These vectors will be computed the first time you call the getAvailableInts or getAvailableChars method. * * @{ */ mutable std::vector charList_; mutable std::vector intList_; /** @} */ public: AbstractAlphabet(): alphabet_(), letters_(), nums_(), charList_(), intList_() {} virtual ~AbstractAlphabet() { for (unsigned int i = 0 ; i < alphabet_.size() ; i++) delete alphabet_[i]; } public: /** * @name Implement these methods from the Alphabet interface. * * @{ */ unsigned int getNumberOfChars() const { return static_cast(alphabet_.size()); } std::string getName(const std::string& state) const throw (BadCharException); std::string getName(int state) const throw (BadIntException); int charToInt(const std::string& state) const throw (BadCharException); std::string intToChar(int state) const throw (BadIntException); bool isIntInAlphabet(int state) const; bool isCharInAlphabet(const std::string& state) const; std::vector getAlias(int state) const throw (BadIntException); std::vector getAlias(const std::string& state) const throw (BadCharException); int getGeneric(const std::vector& states) const throw (BadIntException); std::string getGeneric(const std::vector& states) const throw (AlphabetException); const std::vector& getSupportedInts() const; const std::vector& getSupportedChars() const; int getGapCharacterCode() const { return -1; } bool isGap(int state) const { return state == -1; } bool isGap(const std::string& state) const { return charToInt(state) == -1; } /** @} */ /** * @name Specific methods to access AlphabetState * @{ */ /** * @brief Get a state by its letter. * * This method must be overloaded in specialized classes to send back * a reference of the corect type. * * @param letter The letter of the state to find. * @throw BadCharException If the letter is not in the Alphabet. */ const AlphabetState& getState(const std::string& letter) const throw (BadCharException); /** * @brief Get a state by its num. * * This method must be overloaded in specialized classes to send back * a reference of the corect type. * * @param num The num of the state to find. * @throw BadIntException If the num is not in the Alphabet. */ const AlphabetState& getState(int num) const throw (BadIntException); /** @} */ protected: /** * @brief Add a state to the Alphabet. * * @param st The state to add. */ virtual void registerState(const AlphabetState& st); /** * @brief Set a state in the Alphabet. * * @param pos The index of the state in the alphabet_ vector. * @param st The new state to put in the Alphabet. */ virtual void setState(size_t pos, const AlphabetState& st) throw (IndexOutOfBoundsException); /** * @brief Resize the private alphabet_ vector. * * @param size The new size of the Alphabet. */ void resize(unsigned int size) { alphabet_.resize(size); } /** * @brief Get a state at a position in the alphabet_ vector. * * This method must be overloaded in specialized classes to send back * a reference of the corect type. * * @param pos The index of the state in the alphabet_ vector. * @throw IndexOutOfBoundsException If pos is out of the vector. */ virtual AlphabetState& getStateAt(size_t pos) throw (IndexOutOfBoundsException); /** * @brief Get a state at a position in the alphabet_ vector. * * This method must be overloaded in specialized classes to send back * a reference of the corect type. * * @param pos The index of the state in the alphabet_ vector. * @throw IndexOutOfBoundsException If pos is out of the vector. */ virtual const AlphabetState& getStateAt(size_t pos) const throw (IndexOutOfBoundsException); /** * @brief Re-update the maps using the alphabet_ vector content. */ void remap() { for (size_t i = 0 ; i < alphabet_.size() ; i++) { updateMaps_(i, * alphabet_[i]); } } unsigned int getStateCodingSize() const { return 1; } }; } //end of namespace bpp. #endif // _ABSTRACTALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/Alphabet.h000644 000000 000000 00000035114 12147656566 020536 0ustar00rootroot000000 000000 // // File: Alphabet.h // Created by: Guillaume Deuchst // Julien Dutheil // Created on: Tue Jul 22 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ALPHABET_H_ #define _ALPHABET_H_ #include #include #include "AlphabetExceptions.h" #include "AlphabetState.h" /** * @mainpage * * @par * This library provides classes to store and analyse biological sequences. * Each position in a sequences is coded by a int. An object implementing the bpp::Alphabet * interface is used to make the relation between the int code and its more common character * representation. Support for DNA, RNA, protein and codon sequences is provided. The * bpp::AlphabetTools provides tools to deal with Alphabet objects. * The basic bpp::Sequence class contains the code sequence, a name for the sequence * and optionally comments. More elaborated classes can be built by inheriting this class. * The bpp::SequenceTools static class provides simple analysis tools, like base frequencies measures, * concatenation, etc. * * @par * SeqLib also provides tools to perform in silico molecular biology, like complementation, * transcription, translation, etc. All these methods are particular cases of alphabet translation, and are implemented * via the interface bpp::Translator. Of particular interest are the classes bpp::NucleicAcidsReplication, * bpp::DNAToRNA and bpp::GeneticCode + derivatives. * * @par * Sequence collections are stored as containers. The simplest container implements the * bpp::SequenceContainer interface, providing access to sequences by their name. * The bpp::OrderedSequenceContainer adds access by position in the container. * The simplest implementation of this interface is the bpp::VectorSequenceContainer, which stores the sequences * as a vector of bpp::Sequence objects (or instances inheriting from this class). * Input/output from various file formats is provided, including fasta (bpp::Fasta), GenBank (bpp::GenBank) and Mase (bpp::Mase). * Tools dealing with containers can be found in the bpp::SequenceContainerTools static class. * * @par * Support for alignments is provided via the bpp::SiteContainer interface, which enables site access. * Sites are stored as a distinct class, similar to a "vertical" sequence, called bpp::Site. * It shares several methods with the bpp::Sequence object, although it does not contain a name but * a position attribute. This attribute can be used to track the position of sites when handling * alignments (for instance after removing all gap-containing sites). * There are currently two implementations of the bpp::SiteContainer interface: * - bpp::AlignedSequenceContainer, inheriting from bpp::VectorSequenceContainer and adding the site access. * Sequence access is hence in @f$o(1)@f$ and site access in @f$o(n)@f$, n being the total number of sites. * - bpp::VectorSiteContainer is a symmetric implementation, storing the data as a vector of bpp::Site objects, * providing site access in @f$o(1)@f$ but sequence access in @f$o(m)@f$, m being the total number of sequences. * The static classes bpp::SiteTools and bpp::SiteContainerTools provide some tools (for instance: pairwise alignment) to deal respectively with * bpp::Site and bpp::SiteContainer objects. I/O is provided for several formats, including Clustal (bpp::Clustal) and Phylip (bpp::Phylip). * * @par * Bio++ SeqLib also contains support for sequence properties, like amino-acids biochemical properties. * The interfaces bpp::AlphabetIndex1 and bpp::AlphabetIndex2 provides methods to deal with indices in 1 and 2 dimensions * respectively. Several basic properties are provided, together with input from the AAIndex databases. */ namespace bpp { /** * @brief The Alphabet interface. * * An alphabet object defines all the states allowed for a particular type of * sequence. These states are coded as a string and an integer. * The string description is the one found in the text (human comprehensive) * description of sequences, typically in sequence files. * However, for computionnal needs, this is often more efficient to store the sequences as * a vector of integers. * The link between the two descriptions is made via * the Alphabet classes, and the two methods intToChar() and charToInt(). * The Alphabet interface also provides other methods, like getting the full name * of the states and so on. * * The alphabet objects may throw several exceptions derived of the AlphabetException * class. * * @see AlphabetException, BadCharException, BadIntException */ class Alphabet { public: Alphabet() {} virtual ~Alphabet() {} public: /** * @brief Get the complete name of a state given its int description. * * In case of several states with identical number (i.e. N and X for nucleic alphabets), * this method returns the name of the first found in the vector. * * @param state The int description of the given state. * @return The name of the state. * @throw BadIntException When state is not a valid integer. */ virtual std::string getName(int state) const throw (BadIntException) = 0; /** * @brief Get the complete name of a state given its string description. * * In case of several states with identical number (i.e. N and X for nucleic alphabets), * this method will return the name of the first found in the vector. * * @param state The string description of the given state. * @return The name of the state. * @throw BadCharException When state is not a valid char description. */ virtual std::string getName(const std::string& state) const throw (BadCharException) = 0; /** * @name = Tests * * @{ */ /** * @brief Tell if a state (specified by its int description) is allowed by the * the alphabet. * * @param state The int description. * @return 'true' if the state in known. */ virtual bool isIntInAlphabet(int state) const = 0; /** * @brief Tell if a state (specified by its string description) is allowed by the * the alphabet. * * @param state The string description. * @return 'true' if the state in known. */ virtual bool isCharInAlphabet(const std::string& state) const = 0; /** @} */ /** * @name State access * * @{ */ /** * @brief Get a state given its int description. * * @param state The int description. * @return The AlphabetState. * @throw BadIntException When state is not a valid integer. */ virtual const AlphabetState& getState(int state) const throw (BadIntException) = 0; /** * @brief Get a state given its string description. * * @param state The string description. * @return The AlphabetState. * @throw BadCharException When state is not a valid string. */ virtual const AlphabetState& getState(const std::string& state) const throw (BadCharException) = 0; /** @} */ /** * @name Conversion methods * * @{ */ /** * @brief Give the string description of a state given its int description. * * @param state The int description. * @return The string description. * @throw BadIntException When state is not a valid integer. */ virtual std::string intToChar(int state) const throw (BadIntException) = 0; /** * @brief Give the int description of a state given its string description. * * @param state The string description. * @return The int description. * @throw BadCharException When state is not a valid char description. */ virtual int charToInt(const std::string& state) const throw (BadCharException) = 0; /** @} */ /** * @name Sizes. * * @{ */ /** * @brief Get the number of supported characters in this alphabet, * including generic characters (e.g. return 20 for DNA alphabet). * * @return The total number of supported character descriptions. */ virtual unsigned int getNumberOfChars() const = 0; /** * @brief Get the number of distinct states in alphabet (e.g. return 15 for DNA alphabet). * This is the number of integers used for state description. * * @return The number of distinct states. */ virtual unsigned int getNumberOfTypes() const = 0; /** * @brief Get the number of resolved states in the alphabet (e.g. return 4 for DNA alphabet). * This is the method you'll need in most cases. * * @return The number of resolved states. */ virtual unsigned int getSize() const = 0; /** @} */ /** * @name Utilitary methods * * @{ */ /** * @brief Get all resolved states that match a generic state. * * If the given state is not a generic code then the output vector will contain this unique code. * * @param state The alias to resolve. * @return A vector of resolved states. * @throw BadIntException When state is not a valid integer. */ virtual std::vector getAlias(int state) const throw (BadIntException) = 0; /** * @brief Get all resolved states that match a generic state. * * If the given state is not a generic code then the output vector will contain this unique code. * * @param state The alias to resolve. * @return A vector of resolved states. * @throw BadCharException When state is not a valid char description. */ virtual std::vector getAlias(const std::string& state) const throw (BadCharException) = 0; /** * @brief Get the generic state that match a set of states. * * If the given states contain generic code, each generic code is first resolved and then the new generic state is returned. * If only a single resolved state is given the function return this state. * * @param states A vector of states to resolve. * @return A int code for the computed state. * @throw BadIntException When a state is not a valid integer. */ virtual int getGeneric(const std::vector& states) const throw (BadIntException) = 0; /** * @brief Get the generic state that match a set of states. * * If the given states contain generic code, each generic code is first resolved and then the new generic state is returned. * If only a single resolved state is given the function return this state. * * @param states A vector of states to resolve. * @return A string code for the computed state. * @throw BadCharException when a state is not a valid char description. * @throw CharStateNotSupportedException when the alphabet does not support Char state for unresolved state. */ virtual std::string getGeneric(const std::vector& states) const throw (AlphabetException) = 0; /** * @return A list of all supported int codes. * * Note for developers of new alphabets: * we return a const reference here since the list is supposed to be * stored within the class and should not be modified outside the class. */ virtual const std::vector& getSupportedInts() const = 0; /** * @return A list of all supported character codes. * * Note for developers of new alphabets: * we return a const reference here since the list is supposed to be * stored within the class and should not be modified outside the class. */ virtual const std::vector& getSupportedChars() const = 0; /** * @return The int code for unknown characters. */ virtual int getUnknownCharacterCode() const = 0; /** * @return The int code for gap characters. */ virtual int getGapCharacterCode() const = 0; /** * @param state The state to test. * @return 'True' if the state is a gap. */ virtual bool isGap(int state) const = 0; /** * @param state The state to test. * @return 'True' if the state is a gap. */ virtual bool isGap(const std::string& state) const = 0; /** * @param state The state to test. * @return 'True' if the state is unresolved. */ virtual bool isUnresolved(int state) const = 0; /** * @param state The state to test. * @return 'True' if the state is unresolved. */ virtual bool isUnresolved(const std::string& state) const = 0; /** @} */ /** * @brief Identification method. * * Used to tell if two alphabets describe the same type of sequences. * For instance, this method is used by sequence containers to compare two alphabets and * allow or deny addition of sequences. * * @return A text describing the alphabet. */ virtual std::string getAlphabetType() const = 0; /** * @brief Get the size of the string coding a state. * @return The size of the tring coding each states in the Alphabet. * @author Sylvain Gaillard */ virtual unsigned int getStateCodingSize() const = 0; }; } //end of namespace bpp. #endif // _ALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/DNA.h000644 000000 000000 00000005522 12147656566 017420 0ustar00rootroot000000 000000 // // File: DNA.h // Created by: Guillaume Deuchst // Julien Dutheil // Created on: Tue Jul 22 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _DNA_H_ #define _DNA_H_ #include "NucleicAlphabet.h" //From the STL: #include #include namespace bpp { /** * @brief This alphabet is used to deal with DNA sequences. * * It supports all 4 nucleotides (A, T, G and C) with their standard denomination. * Gaps are coded by '-', unresolved characters are coded by 'X, N, O, 0 or ?'. * Extensive support for generic characters (e.g. 'P', 'Y', etc.) is provided. */ class DNA: public NucleicAlphabet { public: /** * @param exclamationMarkCountsAsGap If yes, '!' characters are replaced by gaps. * Otherwise, they are counted as unknown characters. */ DNA(bool exclamationMarkCountsAsGap = false); virtual ~DNA() {} public: std::vector getAlias(int state) const throw (BadIntException); std::vector getAlias(const std::string& state) const throw (BadCharException); int getGeneric(const std::vector& states) const throw (BadIntException); std::string getGeneric(const std::vector& states) const throw (BadCharException); std::string getAlphabetType() const { return "DNA alphabet"; } }; } //end of namespace bpp. #endif // _DNA_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/LetterAlphabet.h000644 000000 000000 00000007346 12147656566 021724 0ustar00rootroot000000 000000 // // File: LetterAlphabet.h // Author: Sylvain Gaillard // Created: 11/09/2009 14:31:05 // /* Copyright or © or Copr. Bio++ Development Team, (September 11, 2009) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _LETTERALPHABET_ #define _LETTERALPHABET_ // From the STL #include #include #include // From Seq #include "AbstractAlphabet.h" namespace bpp { /** * @brief Specialized partial implementation of Alphabet using single letters. * * @author Sylvain Gaillard */ class LetterAlphabet: public AbstractAlphabet { private: static const int LETTER_UNDEF_VALUE = -99; std::vector letters_; bool caseSensitive_; public: LetterAlphabet(bool caseSensitive = false): letters_(256, LETTER_UNDEF_VALUE), caseSensitive_(caseSensitive) {} virtual ~LetterAlphabet() {} public: bool isCharInAlphabet(char state) const { return letters_[static_cast(state)] != LETTER_UNDEF_VALUE; } bool isCharInAlphabet(const std::string& state) const { return isCharInAlphabet(state[0]); } int charToInt(const std::string &state) const throw (BadCharException) { if (!isCharInAlphabet(state)) throw BadCharException(state, "LetterAlphabet::charToInt: Unknown state", this); return letters_[static_cast(state[0])]; } protected: void registerState(const AlphabetState& st) { AbstractAlphabet::registerState(st); if (caseSensitive_) { letters_[static_cast(st.getLetter()[0])] = st.getNum(); } else { letters_[static_cast(tolower(st.getLetter()[0]))] = st.getNum(); letters_[static_cast(toupper(st.getLetter()[0]))] = st.getNum(); } } void setState(size_t pos, const AlphabetState& st) throw (IndexOutOfBoundsException) { AbstractAlphabet::setState(pos, st); if (caseSensitive_) { letters_[static_cast(st.getLetter()[0])] = st.getNum(); } else { letters_[static_cast(tolower(st.getLetter()[0]))] = st.getNum(); letters_[static_cast(toupper(st.getLetter()[0]))] = st.getNum(); } } }; } #endif // _LETTERALPHABET_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/AlphabetTools.cpp000644 000000 000000 00000011043 12147656566 022105 0ustar00rootroot000000 000000 // // File: AlphabetTools.cpp // Created by: Julien Dutheil // Created on: Fri Oct 10 17:27:39 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "AlphabetTools.h" #include using namespace bpp; // From the STL: #include #include using namespace std; /**********************************************************************************************/ const DNA AlphabetTools::DNA_ALPHABET; const RNA AlphabetTools::RNA_ALPHABET; const ProteicAlphabet AlphabetTools::PROTEIN_ALPHABET; const DefaultAlphabet AlphabetTools::DEFAULT_ALPHABET; /**********************************************************************************************/ int AlphabetTools::getType(char state) { if (state == '-') return -1; state = static_cast(toupper(static_cast(state))); // toupper works on int bool d = DNA_ALPHABET.isCharInAlphabet(TextTools::toString(state)); bool r = RNA_ALPHABET.isCharInAlphabet(TextTools::toString(state)); bool p = PROTEIN_ALPHABET.isCharInAlphabet(TextTools::toString(state)); if (!d && !r && !p) return 0; // Unknown character else if (d && !r && !p) return 1; // DNA specific else if (!d && r && !p) return 2; // RNA specific else if (!d && !r && p) return 3; // Protein specific else if (d && r && !p) return 4; // Nucleotide specific else if (d && !r && p) return 5; // DNA or Protein specific else if (!d && r && p) return 6; // RNA or Protein specific else return 7; // Non-specific character } /**********************************************************************************************/ bool AlphabetTools::checkAlphabetCodingSize(const Alphabet& alphabet) throw (AlphabetException) { if (alphabet.getNumberOfChars() == 0) return true; // Will this really happen? size_t size = alphabet.intToChar(0).size(); for (unsigned int i = 1; i < alphabet.getNumberOfTypes(); i++) { if (alphabet.intToChar(i).size() != size) return false; } return true; } /**********************************************************************************************/ bool AlphabetTools::checkAlphabetCodingSize(const Alphabet* alphabet) throw (AlphabetException) { return checkAlphabetCodingSize(*alphabet); } /**********************************************************************************************/ unsigned int AlphabetTools::getAlphabetCodingSize(const Alphabet& alphabet) throw (AlphabetException) { if (!checkAlphabetCodingSize(alphabet)) throw AlphabetException("Bad alphabet in function Alphabet::getAlphabetCodingSize()."); return static_cast(alphabet.intToChar(0).size()); } /**********************************************************************************************/ unsigned int AlphabetTools::getAlphabetCodingSize(const Alphabet* alphabet) throw (AlphabetException) { return getAlphabetCodingSize(*alphabet); } /**********************************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/CodonAlphabet.h000644 000000 000000 00000016645 12147656566 021531 0ustar00rootroot000000 000000 // // File: CodonAlphabet.h // Created by: Julien Dutheil // Created on: Sun Oct 12 17:41:56 2003 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _CODONALPHABET_H_ #define _CODONALPHABET_H_ #include "WordAlphabet.h" #include "NucleicAlphabet.h" // From the STL: #include namespace bpp { /** * @brief The abstract base class for codon alphabets. * @author Laurent Guéguen * * Since codons are made of 3 nucleic bases (RNA or DNA), this class * has a NucleicAlphabet field used to check char description. This * nucleic alphabet is passed to the constructor. This class also adds * some methods specific to codon manipulation. */ class CodonAlphabet: public WordAlphabet { protected: /** *@brief the vector of the numbers of the stop codon states. */ std::vector stopCodons_; //Constant used for init codon: int initCodon_; public: //Constant used for stop codons: static const std::string STOP; public: // Constructor and destructor. /** * @brief Builds a new codon alphabet from a nucleic alphabet. * * @param alpha The nucleic alphabet to be used. */ CodonAlphabet(const NucleicAlphabet* alpha) : WordAlphabet(alpha, 3), stopCodons_(), initCodon_() {} virtual ~CodonAlphabet() {} virtual std::string getAlphabetType() const = 0; public: /** * @name Codon specific methods * * @{ */ /** * @brief Returns the number of stop codons */ unsigned int numberOfStopCodons() const; /** * @brief Returns the vector of the numbers of the stop codon states. */ const std::vector& stopCodons() const { return stopCodons_;} /** * @brief Get the int code for a codon given the int code of the three underlying positions. * * The int code of each position must match the nucleic alphabet specified for this alphabet. * @param pos1 Int description for position 1. * @param pos2 Int description for position 2. * @param pos3 Int description for position 3. * @return The int code of the codon. */ virtual int getCodon(int pos1, int pos2, int pos3) const throw (BadIntException); /** * @brief Get the char code for a codon given the char code of the three underlying positions. * * The char code of each position must match the nucleic alphabet specified for this alphabet. * NB: This performs pos1 + pos2 + pos3 after checking for each position validity. * @param pos1 Char description for position 1. * @param pos2 Char description for position 2. * @param pos3 Char description for position 3. * @return The Char code of the codon. */ virtual std::string getCodon(const std::string& pos1, const std::string& pos2, const std::string& pos3) const throw (BadCharException); /** * @brief Get the int code of the first position of a codon given its int description. * * @param codon The int description of the codon. * @return The int description of the first position of the codon. */ virtual int getFirstPosition(int codon) const throw (BadIntException); /** * @brief Get the int code of the second position of a codon given its int description. * * @param codon The int description of the codon. * @return The int description of the second position of the codon. */ virtual int getSecondPosition(int codon) const throw (BadIntException); /** * @brief Get the int code of the third position of a codon given its int description. * * @param codon The int description of the codon. * @return The int description of the third position of the codon. */ virtual int getThirdPosition(int codon) const throw (BadIntException); /** * @brief Get the char code of the first position of a codon given its char description. * * @param codon The char description of the codon. * @return The char description of the first position of the codon. */ virtual std::string getFirstPosition (const std::string& codon) const throw (BadCharException); /** * @brief Get the char code of the second position of a codon given its char description. * * @param codon The char description of the codon. * @return The char description of the second position of the codon. */ virtual std::string getSecondPosition(const std::string& codon) const throw (BadCharException); /** * @brief Get the char code of the third position of a codon given its char description. * * @param codon The char description of the codon. * @return The char description of the third position of the codon. */ virtual std::string getThirdPosition(const std::string& codon) const throw (BadCharException); /** * @brief Tell whether a particular codon is a stop codon. * * @param codon The int description of the codon to test. * @return True if the codon is a stop codon. */ bool isStop(int codon) const { return (getName(intToChar(codon)) == STOP); } /** * @brief Tell whether a particular codon is a stop codon. * * @param codon The char description of the codon to test. * @return True if the codon is a stop codon. */ bool isStop(const std::string& codon) const { return (getName(codon) == STOP); } /** * @brief Tell whether a particular codon is the init codon. * * @param codon The int description of the codon to test. * @return True if the codon is the init codon. */ bool isInit(int codon) const { return (codon == initCodon_); } /** * @brief Tell whether a particular codon is the init codon. * * @param codon The char description of the codon to test. * @return True if the codon is a init codon. */ bool isInit(const std::string& codon) const { return (charToInt(codon) == initCodon_); } /** * @return The nucleic alphabet associated to this codon alphabet. */ virtual const NucleicAlphabet* const getNucleicAlphabet() const { return dynamic_cast(vAbsAlph_[0]); } /** @} */ }; } //end of namespace bpp. #endif //_CODONALPHABET_H_ bpp-seq-2.1.0/src/Bpp/Seq/Alphabet/DefaultAlphabet.cpp000644 000000 000000 00000004216 12147656566 022375 0ustar00rootroot000000 000000 // // File: DefaultAlphabet.cpp // Authors: Julien Dutheil // Sylvain Gaillard // Created on: 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "DefaultAlphabet.h" #include "AlphabetState.h" #include using namespace bpp; DefaultAlphabet::DefaultAlphabet(): chars_("ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890.?") { // Alphabet size definition resize(static_cast(chars_.size()) + 1); // Alphabet content definition setState(0, AlphabetState(-1, "-", "Gap")); for (size_t i = 0; i < chars_.size(); i++) { setState(i + 1, AlphabetState(static_cast(i), TextTools::toString(chars_[i]), "")); } } bpp-seq-2.1.0/src/Bpp/Seq/Site.cpp000644 000000 000000 00000007236 12147656566 016541 0ustar00rootroot000000 000000 // // File Site.cpp // Author: Julien Dutheil // Guillaume Deuchst // Created on: Tuesday August 7 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "Site.h" #include "StringSequenceTools.h" using namespace bpp; // From the STL: #include using namespace std; /****************************************************************************************/ Site::Site(const Alphabet* alpha) : BasicSymbolList(alpha), position_(0) {} Site::Site(const Alphabet* alpha, int position) : BasicSymbolList(alpha), position_(position) {} Site::Site(const vector& site, const Alphabet* alpha) throw (BadCharException) : BasicSymbolList(site, alpha), position_(0) {} Site::Site(const vector& site, const Alphabet* alpha, int position) throw (BadCharException) : BasicSymbolList(site, alpha), position_(position) {} Site::Site(const vector& site, const Alphabet* alpha) throw (BadIntException) : BasicSymbolList(site, alpha), position_(0) {} Site::Site(const vector& site, const Alphabet* alpha, int position) throw (BadIntException) : BasicSymbolList(site, alpha), position_(position) {} /****************************************************************************************/ Site::Site(const Site& site): BasicSymbolList(site), position_(site.getPosition()) {} Site& Site::operator=(const Site& s) { SymbolList::operator=(s); content_ = s.getContent(); position_ = s.getPosition(); return *this; } /****************************************************************************************/ bool operator==(const Site& site1, const Site& site2) { // Verify that site's size, position and content are equals if(site1.size() != site2.size()) return false; if(site1.getPosition() != site2.getPosition()) return false; else { for(unsigned int i = 0; i < site1.size(); i++) { if(site1[i] != site2[i]) return false; } return true; } } /****************************************************************************************/ bool operator<(const Site& site1, const Site& site2) { return site1.getPosition() < site2.getPosition(); } /****************************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/SequenceWithQualityTools.cpp000644 000000 000000 00000017175 12147656566 022636 0ustar00rootroot000000 000000 // // File: SequenceWithQualityTools.h // Authors: Vincent Cahais // Sylvain Gaillard // Created on: 16 Apr 2010 // /* Copyright or © or Copr. Bio++ Development Team, (Apr 16, 2010) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SequenceWithQualityTools.h" using namespace bpp; using namespace std; DNA SequenceWithQualityTools::_DNA; RNA SequenceWithQualityTools::_RNA; NucleicAcidsReplication SequenceWithQualityTools::_DNARep(& _DNA, & _DNA); NucleicAcidsReplication SequenceWithQualityTools::_RNARep(& _RNA, & _RNA); NucleicAcidsReplication SequenceWithQualityTools::_transc(& _DNA, & _RNA); /******************************************************************************/ SequenceWithQuality * SequenceWithQualityTools::subseq(const SequenceWithQuality & sequence, unsigned int begin, unsigned int end) throw (IndexOutOfBoundsException, Exception) { // Checking interval if (end >= sequence.size()) throw IndexOutOfBoundsException ("SequenceTools::subseq : Invalid upper bound", end, 0, sequence.size()); if (end < begin) throw Exception ("SequenceTools::subseq : Invalid interval"); // Copy sequence vector temp(sequence.getContent()); vector qualtemp(sequence.getQualities()); // Truncate sequence temp.erase(temp.begin() + end + 1, temp.end()); temp.erase(temp.begin(), temp.begin() + begin); qualtemp.erase(qualtemp.begin() + end + 1, qualtemp.end()); qualtemp.erase(qualtemp.begin(), qualtemp.begin() + begin); // New sequence creation return new SequenceWithQuality(sequence.getName(), temp, qualtemp, sequence.getComments(), sequence.getAlphabet()); } /******************************************************************************/ SequenceWithQuality* SequenceWithQualityTools::concatenate(const SequenceWithQuality& seqwq1, const SequenceWithQuality& seqwq2) throw (AlphabetMismatchException, Exception) { // Sequence's alphabets matching verification if ((seqwq1.getAlphabet()->getAlphabetType()) != (seqwq2.getAlphabet()->getAlphabetType())) throw AlphabetMismatchException("SequenceTools::concatenate : Sequence's alphabets don't match ", seqwq1.getAlphabet(), seqwq2.getAlphabet()); // Sequence's names matching verification if (seqwq1.getName() != seqwq2.getName()) throw Exception ("SequenceTools::concatenate : Sequence's names don't match"); // Concatenate sequences and send result vector sequence = seqwq1.getContent(); vector sequence2 = seqwq2.getContent(); vector qualities = seqwq1.getQualities(); vector qualities2 = seqwq2.getQualities(); sequence.insert(sequence.end(), sequence2.begin(), sequence2.end()); qualities.insert(qualities.end(), qualities2.begin(), qualities2.end()); return new SequenceWithQuality(seqwq1.getName(), sequence, qualities, seqwq1.getComments(), seqwq1.getAlphabet()); } /******************************************************************************/ SequenceWithQuality* SequenceWithQualityTools::complement(const SequenceWithQuality& sequence) throw (AlphabetException) { // Alphabet type checking NucleicAcidsReplication * NAR; if (sequence.getAlphabet()->getAlphabetType() == "DNA alphabet") { NAR = &_DNARep; } else if(sequence.getAlphabet()->getAlphabetType() == "RNA alphabet") { NAR = &_RNARep; } else { throw AlphabetException ("SequenceTools::complement : Sequence must be nucleic.", sequence.getAlphabet()); } Sequence * seq = NAR->translate(sequence); SequenceWithQuality * seqwq = new SequenceWithQuality(sequence.getName(), seq->getContent(), sequence.getQualities(), sequence.getComments(), sequence.getAlphabet()); delete seq; return seqwq; } /******************************************************************************/ SequenceWithQuality* SequenceWithQualityTools::transcript(const SequenceWithQuality& sequence) throw (AlphabetException) { // Alphabet type checking if (sequence.getAlphabet()->getAlphabetType() != "DNA alphabet") { throw AlphabetException ("SequenceTools::transcript : Sequence must be DNA", sequence.getAlphabet()); } Sequence * seq = _transc.translate(sequence); SequenceWithQuality * seqwq = new SequenceWithQuality(sequence.getName(), seq->getContent(), sequence.getQualities(), sequence.getComments(), sequence.getAlphabet()); delete seq; return seqwq; } /******************************************************************************/ SequenceWithQuality* SequenceWithQualityTools::reverseTranscript(const SequenceWithQuality& sequence) throw (AlphabetException) { // Alphabet type checking if (sequence.getAlphabet()->getAlphabetType() != "RNA alphabet") { throw AlphabetException ("SequenceTools::reverseTranscript : Sequence must be RNA", sequence.getAlphabet()); } Sequence * seq = _transc.reverse(sequence); SequenceWithQuality * seqwq = new SequenceWithQuality(sequence.getName(), seq->getContent(), sequence.getQualities(), sequence.getComments(), sequence.getAlphabet()); delete seq; return seqwq; } /******************************************************************************/ SequenceWithQuality* SequenceWithQualityTools::invert(const SequenceWithQuality& sequence) { vector iContent(sequence.getContent().rbegin(),sequence.getContent().rend()); vector iQualities(sequence.getQualities().rbegin(),sequence.getQualities().rend()); SequenceWithQuality* iSeq = sequence.clone(); iSeq->setContent(iContent); iSeq->setQualities(iQualities); return iSeq; } /******************************************************************************/ SequenceWithQuality* SequenceWithQualityTools::removeGaps(const SequenceWithQuality& seq) { vector content; vector qualities; const Alphabet * alpha = seq.getAlphabet(); for(unsigned int i = 0; i < seq.size(); i++) { if(! alpha->isGap(seq[i])) { content.push_back(seq[i]); qualities.push_back(seq.getQualities()[i]); } } SequenceWithQuality * newSeq = dynamic_cast(seq.clone()); newSeq->setContent(content); newSeq->setQualities(qualities); return newSeq; } /******************************************************************************/ SequenceWithQuality& SequenceWithQualityTools::trimLeft(SequenceWithQuality& seq) { bool badqual = false; while (badqual) { } return seq; } bpp-seq-2.1.0/src/Bpp/Seq/DistanceMatrix.h000644 000000 000000 00000015316 12147656566 020217 0ustar00rootroot000000 000000 // // File: DistanceMatrix.h // Created on: Wed jun 08 10:39 2005 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _DISTANCEMATRIX_H_ #define _DISTANCEMATRIX_H_ // From the STL: #include #include #include #include //DimensionException #include namespace bpp { /** * @brief A Matrix class to store phylogenetic distances. */ class DistanceMatrix: public virtual RowMatrix { private: std::vector names_; public: /** * @brief Build a new distance matrix with specified names. * * The dimension of the matrix will be equal to the number of names * * @param names The names to use. */ DistanceMatrix(const std::vector& names): RowMatrix(names.size(), names.size()), names_(names) { reset(); } /** * @brief Build a new distance matrix with specified size. * * Row names will be named 'Taxon 0', 'Taxon 1', and so on. * * @param n The size of the matrix. */ DistanceMatrix(size_t n): RowMatrix(n, n), names_(n) { resize(n); } virtual ~DistanceMatrix() {} DistanceMatrix(const DistanceMatrix& dist): RowMatrix(dist), names_(dist.names_) {} DistanceMatrix& operator=(const DistanceMatrix& dist) { size_t n = dist.size(); resize(n); for(size_t i = 0; i < n; ++i) { for(size_t j = 0; j < n; ++j) { operator()(i, j) = dist(i, j); } } names_ = dist.names_; return *this; } public: /** * @brief Reset the distance matrix: all distances are set to 0. */ void reset() { size_t n = size(); for (size_t i = 0; i < n; i++) { for (size_t j = 0; j < n; j++) { operator()(i, j) = 0; } } } /** * @return The dimension of the matrix. */ size_t size() const { return names_.size(); } /** * @return The names associated to the matrix. */ const std::vector& getNames() const { return names_; } /** * @return The ith name. * @param i Name index. * @throw IndexOutOfBoundsException If i is not a valid index. */ const std::string& getName(size_t i) const throw (IndexOutOfBoundsException) { if (i >= size()) throw IndexOutOfBoundsException("DistanceMatrix::getName. Invalid indice.", i, 0, size()); return names_[i]; } /** * @brief Set the ith name. * * @param i Name index. * @param name The new name. * @throw IndexOutOfBoundsException If i is not a valid index. */ void setName(size_t i, const std::string& name) throw (IndexOutOfBoundsException) { if (i >= size()) throw IndexOutOfBoundsException("DistanceMatrix::setName. Invalid indice.", i, 0, size()); names_[i] = name; } /** * @brief Set the names associated to the matrix. * * @param names Matrix names. * @throw DimensionException If 'names' have not the same size as the matrix. */ void setNames(const std::vector& names) throw (DimensionException) { if (names.size() != names_.size()) throw DimensionException("DistanceMatrix::setNames. Invalid number of names.", names.size(), names_.size()); names_ = names; } /** * @brief Get the index of a given name. * * @param name The name to look for. * @return The position of the name. * @throw Exception If no names are attached to this matrix, or if the name was not found. */ size_t getNameIndex(const std::string& name) const throw (Exception); /** * @brief Change the dimension of the matrix. * * @param n the new dimension of the matrix. */ void resize(size_t n) { RowMatrix::resize(n, n); names_.resize(n); for (size_t i = 0; i < n; ++i) names_[i] = "Taxon " + TextTools::toString(i); reset(); } /** * @brief Access by name. * * @param iName Name 1 (row) * @param jName Name 2 (column) * @return A reference toward the specified distance. * @throw Exception if the matrix has no name of if one of the name do not match existing names. */ virtual const double& operator()(const std::string& iName, const std::string& jName) const throw (Exception) { size_t i = getNameIndex(iName); size_t j = getNameIndex(jName); return operator()(i,j); } /** * @brief Access by name. * * @param iName Name 1 (row) * @param jName Name 2 (column) * @return A reference toward the specified distance. * @throw Exception if the matrix has no name of if one of the name do not match existing names. */ virtual double& operator()(const std::string& iName, const std::string& jName) throw (Exception) { size_t i = getNameIndex(iName); size_t j = getNameIndex(jName); return operator()(i,j); } virtual const double& operator()(size_t i, size_t j) const { return RowMatrix::operator()(i, j); } virtual double& operator()(size_t i, size_t j) { return RowMatrix::operator()(i, j); } }; } //end of namespace bpp. #endif //_DISTANCEMATRIX_H_ bpp-seq-2.1.0/src/Bpp/Seq/SequenceExceptions.h000644 000000 000000 00000010766 12147656566 021116 0ustar00rootroot000000 000000 // // File: SequenceExceptions.h // Created by: Julien Dutheil // Created on: Mon Nov 3 16:35:30 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCEEXCEPTIONS_H_ #define _SEQUENCEEXCEPTIONS_H_ #include "Alphabet/Alphabet.h" #include namespace bpp { class Sequence; /** * @brief The sequence exception base class. * * @see Exception */ class SequenceException : public Exception { private: /** * @brief A pointer toward a sequence object. */ const Sequence* sequence_; public: /** * @brief Build a new SequenceException object. * * @param text A message to be passed to the exception hierarchy. * @param seq A const pointer toward the sequence that threw the exception. */ SequenceException(const std::string& text, const Sequence * seq = 0); SequenceException(const SequenceException& se): Exception(se), sequence_(se.sequence_) {} SequenceException& operator=(const SequenceException& se) { Exception::operator=(se); sequence_ = se.sequence_; return *this; } virtual ~SequenceException() throw() {} public: /** * @brief Get the sequence that threw the exception. * * @return A const pointer toward the sequence. */ virtual const Sequence* getSequence() const { return sequence_; } }; /** * @brief Exception thrown when a sequence is found to be empty and it should not. */ class EmptySequenceException : public SequenceException { public: /** * @brief Build a new EmptySequenceException object. * * @param text A message to be passed to the exception hierarchy. * @param seq A const pointer toward the sequence that threw the exception. */ EmptySequenceException(const std::string& text, const Sequence* seq = 0); virtual ~EmptySequenceException() throw() {} }; /** * @brief Exception thrown when a sequence is found to have gap and it should not. */ class SequenceWithGapException : public SequenceException { public: /** * @brief Build a new SequenceWithGapException object. * * @param text A message to be passed to the exception hierarchy. * @param seq A const pointer toward the sequence that threw the exception. */ SequenceWithGapException(const std::string& text, const Sequence* seq = 0); virtual ~SequenceWithGapException() throw() {} }; /** * @brief Exception thrown when a sequence is not align with others. * * Typically, this may occur when you try to add a bad sequence to a site container. */ class SequenceNotAlignedException : public SequenceException { public: /** * @brief Build a new SequenceNotAlignedException object. * * @param text A message to be passed to the exception hierarchy. * @param seq A const pointer toward the sequence that threw the exception. */ SequenceNotAlignedException(const std::string& text, const Sequence* seq); virtual ~SequenceNotAlignedException() throw() {} }; } //end of namespace bpp. #endif //_SEQUENCEEXCEPTIONS_H_ bpp-seq-2.1.0/src/Bpp/Seq/Sequence.h000644 000000 000000 00000035211 12147656566 017044 0ustar00rootroot000000 000000 // // File: Sequence.h // Created by: Guillaume Deuchst // Julien Dutheil // Created on: Tue Aug 21 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCE_H_ #define _SEQUENCE_H_ #include "SymbolList.h" #include "SequenceExceptions.h" // From the STL: #include #include namespace bpp { /** * @brief Declaration of Comments type. * * Comments are defined as a std::vector of std::strings to allow the later creation of a * full Comments class. */ typedef std::vector Comments; /** * @brief The sequence interface. * * This is a general purpose container, containing an ordered list of states. * The states that allowed to be present in the sequence are defined * by an alphabet object. * * Sequence objets also contain a name attribute and potentially several comment lines. * A sequence object is also event-driven, allowing easy extension. * * @see Alphabet */ class Sequence: public virtual SymbolList { public: virtual ~Sequence() {} public: #ifndef NO_VIRTUAL_COV Sequence* clone() const = 0; #endif /** * @name Setting/getting the name of the sequence. * * @{ */ /** * @brief Get the name of this sequence. * * @return This sequence name. */ virtual const std::string& getName() const = 0; /** * @brief Set the name of this sequence. * * @param name The new name of the sequence. */ virtual void setName(const std::string& name) = 0; /** @} */ /** * @name Setting/getting the comments associated to the sequence. * * @{ */ /** * @brief Get the comments associated to this sequence. * * @return The comments of the sequence. */ virtual const Comments& getComments() const = 0; /** * @brief Set the comments associated to this sequence. * * @param comments The new comments of the sequence. */ virtual void setComments(const Comments& comments) = 0; /** @} */ /** * @name Adjusting the size of the sequence. * * @{ */ /** * @brief Set the whole content of the sequence. * * @param sequence The new content of the sequence. * @see The Sequence constructor for information about the way sequences are internaly stored. */ virtual void setContent(const std::string& sequence) throw (BadCharException) = 0; virtual void setContent(const std::vector& list) throw (BadIntException) = 0; virtual void setContent(const std::vector& list) throw (BadCharException) = 0; /** * @brief Set up the size of a sequence from the right side. * * All new characters are filled with gaps. * If the specified size is < to the sequence size, the sequence will be truncated. * * @param newSize The new size of the sequence. */ virtual void setToSizeR(size_t newSize) = 0; /** * @brief Set up the size of a sequence from the left side. * * All new characters are filled with gaps. * If the specified size is < to the sequence size, the sequence will be truncated. * * @param newSize The new size of the sequence. */ virtual void setToSizeL(size_t newSize) = 0; /** * @brief Append the specified content to the sequence. * * @param content The content to append to the sequence. * @throw BadIntException If the content does not match the current alphabet. */ virtual void append(const std::vector& content) throw (BadIntException) = 0; /** * @brief Append the specified content to the sequence. * * @param content The content to append to the sequence. * @throw BadCharException If the content does not match the current alphabet. */ virtual void append(const std::vector& content) throw (BadCharException) = 0; /** * @brief Append the specified content to the sequence. * * @param content The content to append to the sequence. * @throw BadCharException If the content does not match the current alphabet. */ virtual void append(const std::string& content) throw (BadCharException) = 0; /** @} */ }; /** * @brief A basic implementation of the Sequence interface. * * This is a general purpose container, containing an ordered list of states. * The states that allowed to be present in the sequence are defined * by an alphabet object, which is passed to the sequence constructor by a pointer. * * For programming convenience, the states are stored as integers, but the translation toward * and from a char description is easily performed with the Alphabet classes. * * Sequence objets also contain a name attribute and potentially several comment lines. * * @see Alphabet */ class BasicSequence : public Sequence, public BasicSymbolList { private: /** * @brief The sequence name. */ std::string name_; /** * @brief The sequence comments. */ Comments comments_; public: /** * @brief Empty constructor: build a void Sequence with just an Alphabet * * You can use it safely for all type of Alphabet in order to build an * empty Sequence i.e. without name nor sequence data. * * @param alpha A pointer toward the Alphabet to be used with this Sequence. */ BasicSequence(const Alphabet* alpha); /** * @brief Direct constructor: build a Sequence object from a std::string * You can use it safely for RNA, DNA and protein sequences. * * It can be used with codon sequences too, the std::string will be cut into * parts of size 3. But for more complicated alphabets, you should use one * complete constructors. * * @param name The sequence name. * @param sequence The whole sequence to be parsed as a std::string. * @param alpha A pointer toward the alphabet to be used with this sequence. */ BasicSequence(const std::string& name, const std::string& sequence, const Alphabet* alpha) throw (BadCharException); /** * @brief Direct constructor: build a Sequence object from a std::string. * * You can use it safely for RNA, DNA and protein sequences. * * It can be used with codon sequences too, the std::string will be cut into * tokens of size 3. But for more complicated alphabets, you should use one * complete constructors. * * @param name The sequence name. * @param sequence The whole sequence to be parsed as a std::string. * @param comments Comments to add to the sequence. * @param alpha A pointer toward the alphabet to be used with this sequence. */ BasicSequence(const std::string& name, const std::string& sequence, const Comments& comments, const Alphabet* alpha) throw (BadCharException); /** * @brief General purpose constructor, can be used with any alphabet. * * You should note that the sequence is stored as a std::vector of int. * Hence each std::string in the std::vector will be translated using the alphabet object. * * @param name The sequence name. * @param sequence The sequence content. * @param alpha A pointer toward the alphabet to be used with this sequence. */ BasicSequence(const std::string& name, const std::vector& sequence, const Alphabet* alpha) throw (BadCharException); /** * @brief General purpose constructor, can be used with any alphabet. * * You should note that the sequence is stored as a std::vector of int. * Hence each std::string in the std::vector will be translated using the alphabet object. * * @param name The sequence name. * @param sequence The sequence content. * @param comments Comments to add to the sequence. * @param alpha A pointer toward the alphabet to be used with this sequence. */ BasicSequence(const std::string& name, const std::vector& sequence, const Comments& comments, const Alphabet* alpha) throw (BadCharException); /** * @brief General purpose constructor, can be used with any alphabet. * * @param name The sequence name. * @param sequence The sequence content. * @param alpha A pointer toward the alphabet to be used with this sequence. */ BasicSequence(const std::string& name, const std::vector& sequence, const Alphabet* alpha) throw (BadIntException); /** * @brief General purpose constructor, can be used with any alphabet. * * @param name The sequence name. * @param sequence The sequence content. * @param comments Comments to add to the sequence. * @param alpha A pointer toward the alphabet to be used with this sequence. */ BasicSequence(const std::string& name, const std::vector& sequence, const Comments& comments, const Alphabet* alpha) throw (BadIntException); /** * @brief The Sequence generic copy constructor. This does not perform a hard copy of the alphabet object. */ BasicSequence(const Sequence& s); /** * @brief The Sequence copy constructor. This does not perform a hard copy of the alphabet object. */ BasicSequence(const BasicSequence& s); /** * @brief The Sequence generic assignment operator. This does not perform a hard copy of the alphabet object. * * @return A ref toward the assigned Sequence. */ BasicSequence& operator=(const Sequence& s); /** * @brief The Sequence assignment operator. This does not perform a hard copy of the alphabet object. * * @return A ref toward the assigned Sequence. */ BasicSequence& operator=(const BasicSequence& s); virtual ~BasicSequence() {} public: /** * @name The Clonable interface * * @{ */ BasicSequence* clone() const { return new BasicSequence(*this); } /** @} */ /** * @name Setting/getting the name of the sequence. * * @{ */ /** * @brief Get the name of this sequence. * * @return This sequence name. */ const std::string& getName() const { return name_; } /** * @brief Set the name of this sequence. * * @param name The new name of the sequence. */ void setName(const std::string& name) { name_ = name; } /** @} */ /** * @name Setting/getting the comments associated to the sequence. * * @{ */ /** * @brief Get the comments associated to this sequence. * * @return The comments of the sequence. */ const Comments& getComments() const { return comments_; } /** * @brief Set the comments associated to this sequence. * * @param comments The new comments of the sequence. */ void setComments(const Comments& comments) { comments_ = comments; } /** @} */ /** * @name Adjusting the size of the sequence. * * @{ */ /** * @brief Set the whole content of the sequence. * * @param sequence The new content of the sequence. * @see The Sequence constructor for information about the way sequences are internaly stored. */ virtual void setContent(const std::string& sequence) throw (BadCharException); void setContent(const std::vector& list) throw (BadIntException) { BasicSymbolList::setContent(list); } void setContent(const std::vector& list) throw (BadCharException) { BasicSymbolList::setContent(list); } /** * @brief Set up the size of a sequence from the right side. * * All new characters are filled with gaps. * If the specified size is < to the sequence size, the sequence will be truncated. * * @param newSize The new size of the sequence. */ virtual void setToSizeR(size_t newSize); /** * @brief Set up the size of a sequence from the left side. * * All new characters are filled with gaps. * If the specified size is < to the sequence size, the sequence will be truncated. * * @param newSize The new size of the sequence. */ virtual void setToSizeL(size_t newSize); /** * @brief Append the specified content to the sequence. * * @param content The content to append to the sequence. * @throw BadIntException If the content does not match the current alphabet. */ virtual void append(const std::vector& content) throw (BadIntException); /** * @brief Append the specified content to the sequence. * * @param content The content to append to the sequence. * @throw BadCharException If the content does not match the current alphabet. */ virtual void append(const std::vector& content) throw (BadCharException); /** * @brief Append the specified content to the sequence. * * @param content The content to append to the sequence. * @throw BadCharException If the content does not match the current alphabet. */ virtual void append(const std::string& content) throw (BadCharException); /** @} */ }; } //end of namespace bpp. #endif // _SEQUENCE_H_ bpp-seq-2.1.0/src/Bpp/Seq/CodonSiteTools.h000644 000000 000000 00000032000 12147656566 020175 0ustar00rootroot000000 000000 // // File CodonSiteTools.h // Author : Sylvain Glémin // Last modification : November 2005 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _CODONSITETOOLS_H_ #define _CODONSITETOOLS_H_ #include "SymbolListTools.h" #include "Site.h" #include "Alphabet/CodonAlphabet.h" #include "GeneticCode/GeneticCode.h" #include // From the STL: #include namespace bpp { /** * @brief Utilitary functions for codon sites. */ class CodonSiteTools: public SymbolListTools { public: CodonSiteTools() {} virtual ~CodonSiteTools() {} public: /** * @brief Method to know if a codon site contains gap(s) or stop codons. * * @param site a Site * @throw AlphabetException If the alphabet associated to the site is not a codon alphabet. */ static bool hasGapOrStop(const Site & site) throw (AlphabetException); /** * @brief Method to know if a codon site contains stop codon or not * * @param site a Site * @throw AlphabetException If the alphabet associated to the site is not a codon alphabet. */ static bool hasStop(const Site & site) throw (AlphabetException); /** * @brief Method to know if a polymorphic codon site is polymorphic at only one site * * @param site a Site * @throw AlphabetException If the alphabet associated to the site is not a codon alphabet. * @throw EmptySiteException If the site has size 0. */ static bool isMonoSitePolymorphic(const Site & site) throw (AlphabetException, EmptySiteException); /** * @brief Method to know if polymorphism at a codon site is synonymous * * @param site a Site * @param gc a GeneticCode * @throw AlphabetException If the alphabet associated to the site is not a codon alphabet. * @throw AlphabetMismatchException If the codon alphabet of the site do not match the codon alphabet of the genetic code. * @throw EmptySiteException If the site has size 0. */ static bool isSynonymousPolymorphic(const Site & site, const GeneticCode & gc) throw (AlphabetException, AlphabetMismatchException, EmptySiteException); /** * @brief generate a codon site without rare variants * * Rare variants are replaced by the most frequent allele. * This method is used to exclude rare variants in some analyses as in McDonald-Kreitman Test * (McDonald & Kreitman, 1991, Nature 351 pp652-654). * For an application, see for example (Fay et al. 2001, Genetics 158 pp 1227-1234). * * @param site a Site * @param freqmin a double, allele in frequency stricly lower than freqmin are replaced * @throw AlphabetException If the alphabet associated to the site is not a codon alphabet. * @throw EmptySiteException If the site has size 0. * */ static Site * generateCodonSiteWithoutRareVariant(const Site & site, double freqmin) throw(AlphabetException, EmptySiteException); /** * @brief Compute the number of differences between two codons * * @param i a int * @param j a int * @param ca a CodonAlphabet */ static size_t numberOfDifferences(int i, int j, const CodonAlphabet & ca); /** * @brief Compute the number of synonymous differences between two codons * * For complex codon: * If minchange = false (default option) the different paths are equally weighted. * If minchange = true the path with the minimum number of non-synonymous change is chosen. * Paths included stop codons are excluded. * @param i a int * @param j a int * @param gc a GeneticCode * @param minchange a boolean set by default to false */ static double numberOfSynonymousDifferences(int i, int j, const GeneticCode & gc, bool minchange=false); /** * @brief Compute the synonymous pi per codon site * * The following formula is used: * @f[ * pi = frac{n}{n-1}\sum_{i,j}x_{i}x_{j}P_{ij} * @f] * where n is the number of sequence, \f$x_i\f$ and \f$x_j\f$ the frequencies of each codon type occuring at the site * \f$P_{i,j}\f$ the number of synonymous difference between these codons. * Be careful: here, pi is not normalized by the number of synonymous sites. * * If minchange = false (default option) the different paths are equally weighted. * If minchange = true the path with the minimum number of non-synonymous change is chosen. * * @param site a Site * @param gc a GeneticCode * @param minchange a boolean set by default to false * @throw AlphabetException If the alphabet associated to the site is not a codon alphabet. * @throw AlphabetMismatchException If the codon alphabet of the site do not match the codon alphabet of the genetic code. * @throw EmptySiteException If the site has size 0. */ static double piSynonymous(const Site & site, const GeneticCode & gc, bool minchange=false) throw (AlphabetException, AlphabetMismatchException, EmptySiteException); /** * @brief Compute the non-synonymous pi per codon site * * The following formula is used: * @f[ * pi = frac{n}{n-1}\sum_{i,j}x_{i}x_{j}P_{ij} * @f] * where n is the number of sequence, \f$x_i\f$ and \f$x_j\f$ the frequencies of each codon type occuring at the site * \f$P_{i,j}\f$ the number of nonsynonymous difference between these codons. * Be careful: here, pi is not normalized by the number of non-synonymous sites. * If minchange = false (default option) the different paths are equally weighted. * If minchange = true the path with the minimum number of non-synonymous change is chosen. * * @param site a Site * @param gc a GeneticCode * @param minchange a boolean set by default to false * @throw AlphabetException If the alphabet associated to the site is not a codon alphabet. * @throw AlphabetMismatchException If the codon alphabet of the site do not match the codon alphabet of the genetic code. * @throw EmptySiteException If the site has size 0. */ static double piNonSynonymous(const Site & site, const GeneticCode & gc, bool minchange = false) throw (AlphabetException, AlphabetMismatchException, EmptySiteException); /** * @brief Return the number of synonymous positions of a codon * * A site is consider as x% synonymous if x% of the possible mutations are synonymous * Transition/transversion ratio can be taken into account (use the variable ratio) * * Unresolved codons and stop codon will return a value of 0. * * @param i a int * @param gc a GeneticCode * @param ratio a double set by default to 1 */ static double numberOfSynonymousPositions(int i, const GeneticCode & gc, double ratio=1.0) throw (Exception); /** * @brief Return the mean number of synonymous positions per codon site * * A site is consider as x% synonymous if x% of the possible mutations are synonymous * Transition/transversion ratio can be taken into account (use the variable ratio) * The mean is computed over the VectorSite. * * Unresolved and stop codons are counted as 0. * * @param site a Site * @param gc a GeneticCode * @param ratio a double Set by default to 1 * @throw AlphabetException If the alphabet associated to the site is not a codon alphabet. * @throw AlphabetMismatchException If the codon alphabet of the site do not match the codon alphabet of the genetic code. * @throw EmptySiteException If the site has size 0. */ static double meanNumberOfSynonymousPositions(const Site & site, const GeneticCode & gc, double ratio=1) throw (AlphabetException, AlphabetMismatchException, EmptySiteException); /** * @brief Return the number of subsitutions per codon site * * No recombination is assumed, that is in complex codon homoplasy is assumed. * Example: * @code * ATT * ATT * ATT * ATC * ATC * AGT * AGT * AGC * @endcode * Here, 3 substitutions are counted. Assuming that the last codon (AGC) is a recombinant between ATC and AGT * would have lead to counting only 2 subsitutions. * * Rare variants (<= freqmin) can be excluded. * * @param site a Site * @param freqmin a double To exclude snp in frequency strictly lower than freqmin (by default freqmin = 0) * @throw AlphabetException If the alphabet associated to the site is not a codon alphabet. * @throw EmptySiteException If the site has size 0. */ static size_t numberOfSubsitutions(const Site & site, double freqmin = 0.) throw(AlphabetException, EmptySiteException); /** * @brief Return the number of Non Synonymous subsitutions per codon site. * * It is assumed that the path linking amino acids only involved one substitution by step. * * Rare variants (<= freqmin) can be excluded. * In case of complex codon, the path that gives the minimum number of non-synonymous changes * is chosen. The argument minchange=true is sent to numberOfSynonymousDifferences used in this method. * Otherwise, a non-integer number could be return. * * @param site a Site * @param gc a GeneticCode * @param freqmin a double To exclude snp in frequency strictly lower than freqmin (by default freqmin = 0). * @throw AlphabetException If the alphabet associated to the site is not a codon alphabet. * @throw AlphabetMismatchException If the codon alphabet of the site do not match the codon alphabet of the genetic code. * @throw EmptySiteException If the site has size 0. */ static size_t numberOfNonSynonymousSubstitutions(const Site & site, const GeneticCode & gc, double freqmin = 0.) throw (AlphabetException, AlphabetMismatchException, EmptySiteException); /** * @brief Return a vector with the number of fixed synonymous and non-synonymous differences per codon site * * Compute the number of synonymous and non-synonymous differences between * the concensus codon of SiteIn (i) and SiteOut (j), which are fixed within each alignement. * Example: * @code * SiteIn * ATT * ATT * ATC * @endcode * @code * SiteOut * CTA * CTA * CTA * @endcode * Here, the first position is non-synonymous different and fixed, * the third position is synonymous different but not fixed (polymorphic in SiteIn). * The return vector is thus [0,1]. * In case of complex codon, the path that gives the minimum number of non-synonymous changes * is chosen. The argument minchange=true is sent to numberOfSynonymousDifferences used in this method. * Otherwise, a non-integer number could be return. * * Rare variants (<= freqmin) can be excluded. * * @param siteIn a Site * @param siteOut a Site * @param i an integer * @param j an integer * @param gc a GeneticCode * @throw AlphabetException If the alphabet associated to one of the sites is not a codon alphabet. * @throw AlphabetMismatchException If the codon alphabet each the site do not match the codon alphabet of the genetic code. * @throw EmptySiteException If one of the sites has size 0. */ static std::vector fixedDifferences(const Site & siteIn, const Site & siteOut, int i, int j, const GeneticCode & gc) throw (AlphabetException, AlphabetMismatchException, EmptySiteException); /** * @return True if all sequences have a fourfold degenerated codon in the site * (that is, if a mutation in the fourth position does not change the aminoacid). * @author Benoit Nabholz, Annabelle Haudry * @param site The site to analyze. * @param gc The genetic code to use. */ static bool isFourFoldDegenerated(const Site& site, const GeneticCode& gc); }; } //end of namespace bpp. #endif //_CONDONSITETOOLS_H_ bpp-seq-2.1.0/src/Bpp/Seq/SiteTools.cpp000644 000000 000000 00000032103 12147656566 017551 0ustar00rootroot000000 000000 // // File SiteTools.cpp // Author : Julien Dutheil // Guillaume Deuchst // Created on: Friday August 8 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SiteTools.h" #include "Alphabet/CodonAlphabet.h" #include #include #include using namespace bpp; // From the STL: #include using namespace std; /******************************************************************************/ bool SiteTools::hasGap(const Site& site) { // Main loop : for all characters in site for (size_t i = 0; i < site.size(); i++) { if (site.getAlphabet()->isGap(site[i])) return true; } return false; } /******************************************************************************/ bool SiteTools::isGapOnly(const Site& site) { // Main loop : for all characters in site for (size_t i = 0; i < site.size(); i++) { if (!site.getAlphabet()->isGap(site[i])) return false; } return true; } /******************************************************************************/ bool SiteTools::isGapOrUnresolvedOnly(const Site& site) { // Main loop : for all characters in site for (size_t i = 0; i < site.size(); i++) { if (!site.getAlphabet()->isGap(site[i]) && !site.getAlphabet()->isUnresolved(site[i])) return false; } return true; } /******************************************************************************/ bool SiteTools::hasUnknown(const Site& site) { // Main loop : for all characters in site for (size_t i = 0; i < site.size(); i++) { if (site[i] == site.getAlphabet()->getUnknownCharacterCode()) return true; } return false; } /******************************************************************************/ bool SiteTools::hasStopCodon(const Site& site) { // Main loop : for all characters in site const CodonAlphabet* pca = dynamic_cast(site.getAlphabet()); if (pca == 0) return false; for (size_t i = 0; i < site.size(); i++) { if (pca->isStop(site[i])) return true; } return false; } /******************************************************************************/ bool SiteTools::isComplete(const Site& site) { // Main loop : for all characters in site for (size_t i = 0; i < site.size(); i++) { if (site.getAlphabet()->isGap(site[i]) || site.getAlphabet()->isUnresolved(site[i])) return false; } return true; } /******************************************************************************/ bool SiteTools::areSitesIdentical(const Site& site1, const Site& site2) { // Site's size and content checking if (site1.getAlphabet()->getAlphabetType() != site2.getAlphabet()->getAlphabetType()) return false; if (site1.size() != site2.size()) return false; else { for (size_t i = 0; i < site1.size(); i++) { if (site1[i] != site2[i]) return false; } return true; } } /******************************************************************************/ bool SiteTools::isConstant(const Site& site, bool ignoreUnknown, bool unresolvedRaisesException) throw (EmptySiteException) { // Empty site checking if (site.size() == 0) throw EmptySiteException("SiteTools::isConstant: Incorrect specified site, size must be > 0", &site); // For all site's characters int gap = site.getAlphabet()->getGapCharacterCode(); if (ignoreUnknown) { int s = site[0]; int unknown = site.getAlphabet()->getUnknownCharacterCode(); size_t i = 0; while (i < site.size() && (s == gap || s == unknown)) { s = site[i]; i++; } if (s == unknown || s == gap) { if (unresolvedRaisesException) throw EmptySiteException("SiteTools::isConstant: Site is only made of gaps or generic characters."); else return false; } while (i < site.size()) { if (site[i] != s && site[i] != gap && site[i] != unknown) return false; i++; } } else { int s = site[0]; size_t i = 0; while (i < site.size() && s == gap) { s = site[i]; i++; } if (s == gap) { if (unresolvedRaisesException) throw EmptySiteException("SiteTools::isConstant: Site is only made of gaps."); else return false; } while (i < site.size()) { if (site[i] != s && site[i] != gap) return false; i++; } } return true; } /******************************************************************************/ double SiteTools::variabilityShannon(const Site& site, bool resolveUnknown) throw (EmptySiteException) { // Empty site checking if (site.size() == 0) throw EmptySiteException("SiteTools::variabilityShannon: Incorrect specified site, size must be > 0", &site); map p; getFrequencies(site, p, resolveUnknown); // We need to correct frequencies for gaps: double s = 0.; for (int i = 0; i < static_cast(site.getAlphabet()->getSize()); i++) { double f = p[i]; if (f > 0) s += f * log(f); } return -s; } /******************************************************************************/ double SiteTools::mutualInformation(const Site& site1, const Site& site2, bool resolveUnknown) throw (DimensionException, EmptySiteException) { // Empty site checking if (site1.size() == 0) throw EmptySiteException("SiteTools::mutualInformation: Incorrect specified site, size must be > 0", &site1); if (site2.size() == 0) throw EmptySiteException("SiteTools::mutualInformation: Incorrect specified site, size must be > 0", &site2); if (site1.size() != site2.size()) throw DimensionException("SiteTools::mutualInformation: sites must have the same size!", site1.size(), site2.size()); vector p1(site1.getAlphabet()->getSize()); vector p2(site2.getAlphabet()->getSize()); map > p12; getCounts(site1, site2, p12, resolveUnknown); double mi = 0, tot = 0, pxy; // We need to correct frequencies for gaps: for (size_t i = 0; i < site1.getAlphabet()->getSize(); i++) { for (size_t j = 0; j < site2.getAlphabet()->getSize(); j++) { pxy = p12[static_cast(i)][static_cast(j)]; tot += pxy; p1[i] += pxy; p2[j] += pxy; } } for (size_t i = 0; i < site1.getAlphabet()->getSize(); i++) { p1[i] /= tot; } for (size_t j = 0; j < site2.getAlphabet()->getSize(); j++) { p2[j] /= tot; } for (size_t i = 0; i < site1.getAlphabet()->getSize(); i++) { for (size_t j = 0; j < site2.getAlphabet()->getSize(); j++) { pxy = p12[static_cast(i)][static_cast(j)] / tot; if (pxy > 0) mi += pxy * log(pxy / (p1[i] * p2[j])); } } return mi; } /******************************************************************************/ double SiteTools::jointEntropy(const Site& site1, const Site& site2, bool resolveUnknown) throw (DimensionException, EmptySiteException) { // Empty site checking if (site1.size() == 0) throw EmptySiteException("SiteTools::jointEntropy: Incorrect specified site, size must be > 0", &site1); if (site2.size() == 0) throw EmptySiteException("SiteTools::jointEntropy: Incorrect specified site, size must be > 0", &site2); if (site1.size() != site2.size()) throw DimensionException("SiteTools::jointEntropy: sites must have the same size!", site1.size(), site2.size()); map > p12; getCounts(site1, site2, p12, resolveUnknown); double tot = 0, pxy, h = 0; // We need to correct frequencies for gaps: for (size_t i = 0; i < site1.getAlphabet()->getSize(); i++) { for (size_t j = 0; j < site2.getAlphabet()->getSize(); j++) { pxy = p12[static_cast(i)][static_cast(j)]; tot += pxy; } } for (size_t i = 0; i < site1.getAlphabet()->getSize(); i++) { for (size_t j = 0; j < site2.getAlphabet()->getSize(); j++) { pxy = p12[static_cast(i)][static_cast(j)] / tot; if (pxy > 0) h += pxy * log(pxy); } } return -h; } /******************************************************************************/ double SiteTools::variabilityFactorial(const Site& site) throw (EmptySiteException) { // Empty site checking if (site.size() == 0) throw EmptySiteException("SiteTools::variabilityFactorial: Incorrect specified site, size must be > 0", &site); map p; getCounts(site, p); vector c = MapTools::getValues(p); size_t s = VectorTools::sum(c); long double l = static_cast(NumTools::fact(s)) / static_cast(VectorTools::sum(VectorTools::fact(c))); return (static_cast(std::log(l))); } /******************************************************************************/ double SiteTools::heterozygosity(const Site& site) throw (EmptySiteException) { // Empty site checking if (site.size() == 0) throw EmptySiteException("SiteTools::heterozygosity: Incorrect specified site, size must be > 0", &site); map p; getFrequencies(site, p); vector c = MapTools::getValues(p); double n = VectorTools::norm(MapTools::getValues(p)); return 1. - n * n; } /******************************************************************************/ size_t SiteTools::getNumberOfDistinctCharacters(const Site& site) throw (EmptySiteException) { // Empty site checking if (site.size() == 0) throw EmptySiteException("SiteTools::getNumberOfDistinctCharacters(): Incorrect specified site, size must be > 0", &site); // For all site's characters if (SiteTools::isConstant(site)) return 1; map counts; SymbolListTools::getCounts(site, counts); int s = 0; for (map::iterator it = counts.begin(); it != counts.end(); it++) { if (it->second != 0) s++; } return s; } /******************************************************************************/ bool SiteTools::hasSingleton(const Site& site) throw (EmptySiteException) { // Empty site checking if (site.size() == 0) throw EmptySiteException("SiteTools::hasSingleton: Incorrect specified site, size must be > 0", &site); // For all site's characters if (SiteTools::isConstant(site)) return false; map counts; getCounts(site, counts); for (map::iterator it = counts.begin(); it != counts.end(); it++) { if (it->second == 1) return true; } return false; } /******************************************************************************/ bool SiteTools::isParsimonyInformativeSite(const Site& site) throw (EmptySiteException) { // Empty site checking if (site.size() == 0) throw EmptySiteException("SiteTools::isParsimonyInformativeSite: Incorrect specified site, size must be > 0", &site); // For all site's characters if (SiteTools::isConstant(site, false, false)) return false; map counts; SymbolListTools::getCounts(site, counts); size_t npars = 0; for (map::iterator it = counts.begin(); it != counts.end(); it++) { if (it->second > 1) npars++; } if (npars > 1) return true; return false; } /******************************************************************************/ bool SiteTools::isTriplet(const Site& site) throw (EmptySiteException) { // Empty site checking if (site.size() == 0) throw EmptySiteException("SiteTools::isTriplet: Incorrect specified site, size must be > 0", &site); // For all site's characters if (SiteTools::getNumberOfDistinctCharacters(site) >= 3) return true; else return false; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/App/SequenceApplicationTools.cpp000644 000000 000000 00000046451 12147656566 023334 0ustar00rootroot000000 000000 // // File: SequenceApplicationTools.cpp // Created by: Julien Dutheil // Created on: Fri Oct 21 13:13 // from file old ApplicationTools.h created on Sun Dec 14 09:36:26 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SequenceApplicationTools.h" #include "../Alphabet/BinaryAlphabet.h" #include "../Alphabet/DefaultAlphabet.h" #include "../Alphabet/EchinodermMitochondrialCodonAlphabet.h" #include "../Alphabet/InvertebrateMitochondrialCodonAlphabet.h" #include "../Alphabet/StandardCodonAlphabet.h" #include "../Alphabet/VertebrateMitochondrialCodonAlphabet.h" #include "../Alphabet/YeastMitochondrialCodonAlphabet.h" #include "../Alphabet/AlphabetTools.h" #include "../GeneticCode/EchinodermMitochondrialGeneticCode.h" #include "../GeneticCode/InvertebrateMitochondrialGeneticCode.h" #include "../GeneticCode/StandardGeneticCode.h" #include "../GeneticCode/VertebrateMitochondrialGeneticCode.h" #include "../GeneticCode/YeastMitochondrialGeneticCode.h" #include "../Io/BppOSequenceReaderFormat.h" #include "../Io/BppOAlignmentReaderFormat.h" #include "../Io/BppOSequenceWriterFormat.h" #include "../Io/BppOAlignmentWriterFormat.h" #include "../Io/BppOAlphabetIndex1Format.h" #include "../Io/BppOAlphabetIndex2Format.h" #include "../Io/MaseTools.h" #include "../SiteTools.h" #include "../SequenceTools.h" #include #include #include using namespace bpp; using namespace std; /******************************************************************************/ Alphabet* SequenceApplicationTools::getAlphabet( map& params, const string& suffix, bool suffixIsOptional, bool verbose, bool allowGeneric) throw (Exception) { Alphabet* chars; string alphtt = ApplicationTools::getStringParameter("alphabet", params, "DNA", suffix, suffixIsOptional); string alphabet = ""; map args; int flag = 0; KeyvalTools::parseProcedure(alphtt, alphabet, args); unsigned int lg = 1; if (alphabet == "Word") { if (args.find("length") == args.end()) throw Exception("Missing length parameter for Word alphabet"); lg = TextTools::to(args["length"]); if (args.find("letter") == args.end()) throw Exception("Missing letter alphabet for Word alphabet"); alphabet = args["letter"]; flag = 1; } else if (alphabet == "RNY") { if (args.find("letter") == args.end()) throw Exception("Missing letter alphabet for RNY alphabet"); alphabet = args["letter"]; flag = 2; } if (alphabet == "Binary") chars = new BinaryAlphabet(); else if (alphabet == "DNA") { bool mark = ApplicationTools::getBooleanParameter("bangAsGap", args, false, "", true, false); chars = new DNA(mark); } else if (alphabet == "RNA") { bool mark = ApplicationTools::getBooleanParameter("bangAsGap", args, false, "", true, false); chars = new RNA(mark); } else if (alphabet == "Protein") chars = new ProteicAlphabet(); else if (allowGeneric && alphabet == "Generic") chars = new DefaultAlphabet(); else if (alphabet == "Codon") { if (args.find("letter") == args.end()) throw Exception("Missing 'letter' argument in Codon :" + alphabet); string alphnDesc = ApplicationTools::getStringParameter("letter", args, "RNA"); string alphn; map alphnArgs; KeyvalTools::parseProcedure(alphnDesc, alphn, alphnArgs); NucleicAlphabet* pnalph; if (alphn == "RNA") { bool mark = ApplicationTools::getBooleanParameter("bangAsGap", alphnArgs, false, "", true, false); pnalph = new RNA(mark); } else if (alphn == "DNA") { bool mark = ApplicationTools::getBooleanParameter("bangAsGap", alphnArgs, false, "", true, false); pnalph = new DNA(mark); } else throw Exception("Alphabet not known in Codon : " + alphn); string type = ApplicationTools::getStringParameter("type", args, "Standard"); if (type == "EchinodermMitochondrial") chars = new EchinodermMitochondrialCodonAlphabet(pnalph); else if (type == "InvertebrateMitochondrial") chars = new InvertebrateMitochondrialCodonAlphabet(pnalph); else if (type == "Standard") chars = new StandardCodonAlphabet(pnalph); else if (type == "VertebrateMitochondrial") chars = new VertebrateMitochondrialCodonAlphabet(pnalph); else if (type == "YeastMitochondrial") chars = new YeastMitochondrialCodonAlphabet(pnalph); else throw Exception("Unknown Alphabet : " + alphabet); alphabet = alphabet + "(" + alphn + ")"; } else throw Exception("Alphabet not known: " + alphabet); if (flag == 1) { chars = new WordAlphabet(chars, lg); string al = " "; for (unsigned i = 0; i < lg; i++) { al += alphabet + " "; } alphabet = "Word(" + al + ")"; } else if (flag == 2) { if (AlphabetTools::isNucleicAlphabet(chars)) { chars = new RNY(*(dynamic_cast(chars))); alphabet = "RNY(" + alphabet + ")"; } else throw Exception("RNY needs a Nucleic Alphabet, instead of " + alphabet); } if (verbose) ApplicationTools::displayResult("Alphabet type ", alphabet); return chars; } /******************************************************************************/ GeneticCode* SequenceApplicationTools::getGeneticCode( const NucleicAlphabet* alphabet, const string& description) throw (Exception) { GeneticCode* geneCode; if (description.find("EchinodermMitochondrial") != string::npos) geneCode = new EchinodermMitochondrialGeneticCode(alphabet); else if (description.find("InvertebrateMitochondrial") != string::npos) geneCode = new InvertebrateMitochondrialGeneticCode(alphabet); else if (description.find("Standard") != string::npos) geneCode = new StandardGeneticCode(alphabet); else if (description.find("VertebrateMitochondrial") != string::npos) geneCode = new VertebrateMitochondrialGeneticCode(alphabet); else if (description.find("YeastMitochondrial") != string::npos) geneCode = new YeastMitochondrialGeneticCode(alphabet); else throw Exception("Unknown GeneticCode: " + description); return geneCode; } /******************************************************************************/ AlphabetIndex1* SequenceApplicationTools::getAlphabetIndex1(const Alphabet* alphabet, const string& description, const string& message, bool verbose) throw (Exception) { BppOAlphabetIndex1Format reader(alphabet, message, verbose); return reader.read(description); } AlphabetIndex2* SequenceApplicationTools::getAlphabetIndex2(const Alphabet* alphabet, const string& description, const string& message, bool verbose) throw (Exception) { BppOAlphabetIndex2Format reader(alphabet, message, verbose); return reader.read(description); } /******************************************************************************/ SequenceContainer* SequenceApplicationTools::getSequenceContainer( const Alphabet* alpha, map& params, const string& suffix, bool suffixIsOptional, bool verbose) { string sequenceFilePath = ApplicationTools::getAFilePath("input.sequence.file", params, true, true, suffix, suffixIsOptional); string sequenceFormat = ApplicationTools::getStringParameter("input.sequence.format", params, "Fasta()", suffix, suffixIsOptional); BppOSequenceReaderFormat bppoReader(verbose); auto_ptr iSeq(bppoReader.read(sequenceFormat)); if (verbose) { ApplicationTools::displayResult("Sequence file " + suffix, sequenceFilePath); ApplicationTools::displayResult("Sequence format " + suffix, iSeq->getFormatName()); } SequenceContainer* sequences = iSeq->readSequences(sequenceFilePath, alpha); return sequences; } /******************************************************************************/ VectorSiteContainer* SequenceApplicationTools::getSiteContainer( const Alphabet* alpha, map& params, const string& suffix, bool suffixIsOptional, bool verbose) { string sequenceFilePath = ApplicationTools::getAFilePath("input.sequence.file", params, true, true, suffix, suffixIsOptional); string sequenceFormat = ApplicationTools::getStringParameter("input.sequence.format", params, "Fasta()", suffix, suffixIsOptional); BppOAlignmentReaderFormat bppoReader(verbose); auto_ptr iAln(bppoReader.read(sequenceFormat)); map args(bppoReader.getUnparsedArguments()); if (verbose) { ApplicationTools::displayResult("Sequence file " + suffix, sequenceFilePath); ApplicationTools::displayResult("Sequence format " + suffix, iAln->getFormatName()); } const Alphabet* alpha2; if (AlphabetTools::isRNYAlphabet(alpha)) alpha2 = &dynamic_cast(alpha)->getLetterAlphabet(); else alpha2 = alpha; const SequenceContainer* seqCont = iAln->readAlignment(sequenceFilePath, alpha2); VectorSiteContainer* sites2 = new VectorSiteContainer(*dynamic_cast(seqCont)); delete seqCont; VectorSiteContainer* sites; if (AlphabetTools::isRNYAlphabet(alpha)) { const SequenceTools ST; sites = new VectorSiteContainer(alpha); for (unsigned int i = 0; i < sites2->getNumberOfSequences(); i++) { sites->addSequence(*(ST.RNYslice(sites2->getSequence(i)))); } delete sites2; } else sites = sites2; // Look for site selection: if (iAln->getFormatName() == "MASE file") { // getting site set: string siteSet = ApplicationTools::getStringParameter("siteSelection", args, "none", suffix, suffixIsOptional, false); if (siteSet != "none") { VectorSiteContainer* selectedSites; try { selectedSites = dynamic_cast(MaseTools::getSelectedSites(*sites, siteSet)); if (verbose) ApplicationTools::displayResult("Set found", TextTools::toString(siteSet) + " sites."); } catch (IOException& ioe) { throw ioe; } if (selectedSites->getNumberOfSites() == 0) { throw Exception("Site set '" + siteSet + "' is empty."); } delete sites; sites = selectedSites; } } return sites; } /******************************************************************************/ VectorSiteContainer* SequenceApplicationTools::getSitesToAnalyse( const SiteContainer& allSites, map& params, string suffix, bool suffixIsOptional, bool gapAsUnknown, bool verbose) { // Fully resolved sites, i.e. without jokers and gaps: SiteContainer* sitesToAnalyse; VectorSiteContainer* sitesToAnalyse2; string option = ApplicationTools::getStringParameter("input.sequence.sites_to_use", params, "complete", suffix, suffixIsOptional); if (verbose) ApplicationTools::displayResult("Sites to use", option); if (option == "all") { sitesToAnalyse = new VectorSiteContainer(allSites); string maxGapOption = ApplicationTools::getStringParameter("input.sequence.max_gap_allowed", params, "100%", suffix, suffixIsOptional); if (maxGapOption[maxGapOption.size() - 1] == '%') { double gapFreq = TextTools::toDouble(maxGapOption.substr(0, maxGapOption.size() - 1)) / 100.; if (gapFreq < 1) { if (verbose) ApplicationTools::displayTask("Remove sites with gaps", true); for (size_t i = sitesToAnalyse->getNumberOfSites(); i > 0; --i) { if (verbose) ApplicationTools::displayGauge(sitesToAnalyse->getNumberOfSites() - i, sitesToAnalyse->getNumberOfSites() - 1, '='); map freq; SiteTools::getFrequencies(sitesToAnalyse->getSite(i - 1), freq); if (freq[-1] > gapFreq) sitesToAnalyse->deleteSite(i - 1); } if (verbose) ApplicationTools::displayTaskDone(); } } else { size_t gapNum = TextTools::to(maxGapOption); if (gapNum < sitesToAnalyse->getNumberOfSequences()) { if (verbose) ApplicationTools::displayTask("Remove sites with gaps", true); for (size_t i = sitesToAnalyse->getNumberOfSites(); i > 0; i--) { if (verbose) ApplicationTools::displayGauge(sitesToAnalyse->getNumberOfSites() - i, sitesToAnalyse->getNumberOfSites() - 1, '='); map counts; SiteTools::getCounts(sitesToAnalyse->getSite(i - 1), counts); if (counts[-1] > gapNum) sitesToAnalyse->deleteSite(i - 1); } if (verbose) ApplicationTools::displayTaskDone(); } } string maxUnresolvedOption = ApplicationTools::getStringParameter("input.sequence.max_unresolved_allowed", params, "100%", suffix, suffixIsOptional); size_t sAlph = sitesToAnalyse->getAlphabet()->getSize(); if (maxUnresolvedOption[maxUnresolvedOption.size() - 1] == '%') { double unresolvedFreq = TextTools::toDouble(maxUnresolvedOption.substr(0, maxUnresolvedOption.size() - 1)) / 100.; if (unresolvedFreq < 1) { if (verbose) ApplicationTools::displayTask("Remove unresolved sites", true); for (size_t i = sitesToAnalyse->getNumberOfSites(); i > 0; --i) { if (verbose) ApplicationTools::displayGauge(sitesToAnalyse->getNumberOfSites() - i, sitesToAnalyse->getNumberOfSites() - 1, '='); map freq; SiteTools::getFrequencies(sitesToAnalyse->getSite(i - 1), freq); double x = 0; for (unsigned int l = 0; l < sAlph; l++) { x += freq[l]; } if (1 - x > unresolvedFreq) sitesToAnalyse->deleteSite(i - 1); } if (verbose) ApplicationTools::displayTaskDone(); } } else { size_t nbSeq = sitesToAnalyse->getNumberOfSequences(); size_t unresolvedNum = TextTools::to(maxUnresolvedOption); if (unresolvedNum < nbSeq) { if (verbose) ApplicationTools::displayTask("Remove sites with gaps", true); for (size_t i = sitesToAnalyse->getNumberOfSites(); i > 0; i--) { if (verbose) ApplicationTools::displayGauge(sitesToAnalyse->getNumberOfSites() - i, sitesToAnalyse->getNumberOfSites() - 1, '='); map counts; SiteTools::getCounts(sitesToAnalyse->getSite(i - 1), counts); size_t x = 0; for (int l = 0; l < static_cast(sAlph); l++) { x += counts[l]; } if (nbSeq - x > unresolvedNum) sitesToAnalyse->deleteSite(i - 1); } if (verbose) ApplicationTools::displayTaskDone(); } } if (gapAsUnknown) { SiteContainerTools::changeGapsToUnknownCharacters(*sitesToAnalyse); } } else if (option == "complete") { sitesToAnalyse = SiteContainerTools::getCompleteSites(allSites); size_t nbSites = sitesToAnalyse->getNumberOfSites(); if (verbose) ApplicationTools::displayResult("Complete sites", TextTools::toString(nbSites)); } else if (option == "nogap") { sitesToAnalyse = SiteContainerTools::getSitesWithoutGaps(allSites); size_t nbSites = sitesToAnalyse->getNumberOfSites(); if (verbose) ApplicationTools::displayResult("Sites without gap", TextTools::toString(nbSites)); } else { throw Exception("Option '" + option + "' unknown in parameter 'sequence.sites_to_use'."); } if (AlphabetTools::isCodonAlphabet(sitesToAnalyse->getAlphabet())) { option = ApplicationTools::getStringParameter("input.sequence.remove_stop_codons", params, "no", suffix, true); if ((option != "") && verbose) ApplicationTools::displayResult("Remove Stop Codons", option); if (option == "yes") { sitesToAnalyse2 = dynamic_cast(SiteContainerTools::removeStopCodonSites(*sitesToAnalyse)); delete sitesToAnalyse; } else sitesToAnalyse2 = dynamic_cast(sitesToAnalyse); } else sitesToAnalyse2 = dynamic_cast(sitesToAnalyse); return sitesToAnalyse2; } /******************************************************************************/ void SequenceApplicationTools::writeSequenceFile( const SequenceContainer& sequences, map& params, const string& suffix, bool verbose) { string sequenceFilePath = ApplicationTools::getAFilePath("output.sequence.file", params, true, false, suffix, false); string sequenceFormat = ApplicationTools::getStringParameter("output.sequence.format", params, "Fasta", suffix, false, true); BppOSequenceWriterFormat bppoWriter(verbose); auto_ptr oSeq(bppoWriter.read(sequenceFormat)); if (verbose) { ApplicationTools::displayResult("Output sequence file " + suffix, sequenceFilePath); ApplicationTools::displayResult("Output sequence format " + suffix, oSeq->getFormatName()); } // Write sequences: oSeq->writeSequences(sequenceFilePath, sequences, true); } /******************************************************************************/ void SequenceApplicationTools::writeAlignmentFile( const SiteContainer& sequences, map& params, const string& suffix, bool verbose) { string sequenceFilePath = ApplicationTools::getAFilePath("output.sequence.file", params, true, false, suffix, false); string sequenceFormat = ApplicationTools::getStringParameter("output.sequence.format", params, "Fasta", suffix, false, true); BppOAlignmentWriterFormat bppoWriter(verbose); auto_ptr oAln(bppoWriter.read(sequenceFormat)); if (verbose) { ApplicationTools::displayResult("Output alignment file " + suffix, sequenceFilePath); ApplicationTools::displayResult("Output alignment format " + suffix, oAln->getFormatName()); } // Write sequences: oAln->writeAlignment(sequenceFilePath, sequences, true); } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/App/SequenceApplicationTools.h000644 000000 000000 00000025333 12147656566 022775 0ustar00rootroot000000 000000 // // File: SequenceApplicationTools.h // Created by: Julien Dutheil // Created on: Fri Oct 21 13:13 // from file old ApplicationTools.h created on Sun Dec 14 09:36:26 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCEAPPLICATIONTOOLS_H_ #define _SEQUENCEAPPLICATIONTOOLS_H_ #include "../Alphabet/Alphabet.h" #include "../GeneticCode/GeneticCode.h" #include "../AlphabetIndex/AlphabetIndex1.h" #include "../AlphabetIndex/AlphabetIndex2.h" #include "../Container/SequenceContainer.h" #include "../Container/VectorSiteContainer.h" #include #include namespace bpp { /** * @brief This class provides some common tools for applications. * * The functions parse some option file, create corresponding objects and send * a pointer toward it. * * The option files are supposed to follow this simple format: * @code * parameterName = parameterContent * @endcode * with one parameter per line. * * @see ApplicationTools */ class SequenceApplicationTools { public: SequenceApplicationTools() {} virtual ~SequenceApplicationTools() {} public: /** * @brief Build an Alphabet object according to options. * * Options used are: * - alphabet = [DNA|RNA|Protein], the alphabet type to use. * = [DNA|RNA|Protein](length=n) a word-alphabet of * words with length n * = [EchinodermMitochondrialCodonAlphabet * | InvertebrateMitochondrialCodonAlphabet * | InvertebrateMitochondrialCodonAlphabet * | StandardCodonAlphabet * | VertebrateMitochondrialCodonAlphabet]([alphn=NA|RNA]) * a codon-alphabet * * @param params The attribute map where options may be found. * @param suffix A suffix to be applied to each attribute name. * @param suffixIsOptional Tell if the suffix is absolutely required. * @param verbose Print some info to the 'message' output stream. * @param allowGeneric Tell if generic alphabets can be used. * @return A new Alphabet object according to options specified. */ static Alphabet* getAlphabet( std::map& params, const std::string& suffix = "", bool suffixIsOptional = true, bool verbose = true, bool allowGeneric = false) throw (Exception); /** * @brief Build a GeneticCode object according to options. * * @param alphabet pointer to the NucleicAlphabet * @param description for the name of the GeneticCode: * [EchinodermMitochondrialGeneticCode * | InvertebrateMitochondrialGeneticCode * | InvertebrateMitochondrialGeneticCode * | StandardGeneticCode * | VertebrateMitochondrialGeneticCode] * @return A new GeneticCode object * @throw Exception in case of bad description. */ static GeneticCode* getGeneticCode(const NucleicAlphabet* alphabet, const std::string& description) throw (Exception); /** * @brief Build a AlphabetIndex1 object for a given alphabet. * * @param alphabet The alphabet to use. This is currently only used for assessing the type of distance allowed. * @param description Which distance to use. See the Bio++ Program Suite reference manual for a description of the syntax. * @param message To be displayed when parsing. * @param verbose Tell if some info should be displayed while parsing. * @return A new AlphabetIndex1 object. * @throw Exception in case of bad description. */ static AlphabetIndex1* getAlphabetIndex1(const Alphabet* alphabet, const std::string& description, const std::string& message = "Alphabet distance:", bool verbose = true) throw (Exception); /** * @brief Build a AlphabetIndex2 object for a given alphabet. * * @param alphabet The alphabet to use. This is currently only used for assessing the type of distance allowed. * @param description Which distance to use. See the Bio++ Program Suite reference manual for a description of the syntax. * @param message To be displayed when parsing. * @return A new AlphabetIndex2 object. * @param verbose Tell if some info should be displayed while parsing. * @throw Exception in case of bad description. */ static AlphabetIndex2* getAlphabetIndex2(const Alphabet* alphabet, const std::string& description, const std::string& message = "Alphabet distance:", bool verbose = true) throw (Exception); /** * @brief Build a SequenceContainer object according to options. * * The sequences do not have to be aligned. * The supported sequence formats are Fasta, DCSE, Clustal, Mase, Phylip and GenBank. * * See the Bio++ program suite manual for a full description of the syntax. * * @param alpha The alphabet to use in the container. * @param params The attribute map where options may be found. * @param suffix A suffix to be applied to each attribute name. * @param suffixIsOptional Tell if the suffix is absolutely required. * @param verbose Print some info to the 'message' output stream. * @return A new VectorSequenceContainer object according to options specified. * @see getSiteContainer to read an alignment. */ static SequenceContainer* getSequenceContainer( const Alphabet* alpha, std::map& params, const std::string& suffix = "", bool suffixIsOptional = true, bool verbose = true); /** * @brief Build a SiteContainer object according to options. * * Sequences in file must be aligned. * The supported sequence formats are Fasta, DCSE, Clustal, Mase and Phylip. * * See the Bio++ program suite manual for a full description of the syntax. * * @param alpha The alphabet to use in the container. * @param params The attribute map where options may be found. * @param suffix A suffix to be applied to each attribute name. * @param suffixIsOptional Tell if the suffix is absolutely required. * @param verbose Print some info to the 'message' output stream. * @return A new VectorSiteContainer object according to options specified. */ static VectorSiteContainer* getSiteContainer( const Alphabet* alpha, std::map& params, const std::string& suffix = "", bool suffixIsOptional = true, bool verbose = true); /** * @brief Retrieves sites suitable for the analysis. * * Options used are: * - sequence.sites_to_use = [all|complete|nogap]. * * If the 'complete' option is used, only fully resolve site will be taken * into account. * If the 'nogap' option is used, only sites without gap will be taken into * account. * If 'gapAsUnknown' is set to true and the all option is selected, gaps will * be changed to 'unknown' character is sequences. * * - sequence.max_gap_allowed = [57%|30] * If a % sign fallow the number, it is taken to be a frequence (in percent). * This specify the maximum amount of gaps allowed for each site. * Sites not satisfying this amount will be removed. * A value of 100% will remove all gap-only sites, a value >100% will keep all sites. * * @param allSites The site container from which sites must be retrieved. * @param params The attribute map where options may be found. * @param suffix A suffix to be applied to each attribute name. * @param suffixIsOptional Tell if the suffix is absolutely required. * @param gapAsUnknown Convert gaps to unknown characters. * @param verbose Print some info to the 'message' output stream. * @return A new VectorSiteContainer object containing sites of interest. */ static VectorSiteContainer* getSitesToAnalyse( const SiteContainer& allSites, std::map& params, std::string suffix = "", bool suffixIsOptional = true, bool gapAsUnknown = true, bool verbose = true); /** * @brief Write a sequence file according to options. * * The supported sequence formats are Fasta and Mase. * * See the Bio++ program suite manual for a full description of the syntax. * * @see writeSequenceFile(SiteContainer) for writing alignments, with more output formats. * * @param sequences The sequences to write. * @param params The attribute map where options may be found. * @param suffix A suffix to be applied to each attribute name. * @param verbose Print some info to the 'message' output stream. */ static void writeSequenceFile( const SequenceContainer& sequences, std::map& params, const std::string& suffix = "", bool verbose = true); /** * @brief Write a sequence alignment file according to options. * * The supported sequence formats are Fasta, Mase and Phylip. * * See the Bio++ program suite manual for a full description of the syntax. * * @param sequences The aligned sequences to write. * @param params The attribute map where options may be found. * @param suffix A suffix to be applied to each attribute name. * @param verbose Print some info to the 'message' output stream. */ static void writeAlignmentFile( const SiteContainer& sequences, std::map& params, const std::string& suffix = "", bool verbose = true); }; } // end of namespace bpp. #endif // _SEQUENCEAPPLICATIONTOOLS_H_ bpp-seq-2.1.0/src/Bpp/Seq/SequenceWithQuality.h000644 000000 000000 00000064263 12147656566 021262 0ustar00rootroot000000 000000 // // File: SequenceWithQuality.h // Authors: Sylvain Gaillard // Vincent Cahais // Julien Dutheil // Created: 19/01/2010 16:01:20 // /* Copyright or © or Copr. Bio++ Development Team, (January 19, 2010) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCEQUALITY_H_ #define _SEQUENCEQUALITY_H_ #include "SequenceWithAnnotation.h" #include #include // From the STL #include #include namespace bpp { /** * @brief The SequenceQuality class * * This is a sequence with quality score associated to each element. * The score is a signed int value that can represent the phred or the * Solexa quality score for nucleic sequence. * * @author Sylvain Gaillard, Vincent Cahais, Julien Dutheil */ class SequenceQuality : public virtual SequenceAnnotation { private: bool removable_; std::vector qualScores_; public: static const std::string QUALITY_SCORE; static const int DEFAULT_QUALITY_VALUE; public: /** * @name Constructors * @{ */ /** * @brief Build a new SequenceQuality object * * Build a new SequenceQuality object and set the quality scores to * the default value DEFAULT_QUALITY_VALUE. * * @param size The size of the sequence. * @param removable Tell if this listener can be removed by the user. */ SequenceQuality(size_t size = 0, bool removable = true) : removable_(removable), qualScores_(size, DEFAULT_QUALITY_VALUE) {} /** * @brief Build a new SequenceQuality object * * Build a new SequenceQuality and assign quality scores from * a vector of int. * * @param quality The quality scores * @param removable Tell if this listener can be removed by the user. */ SequenceQuality(const std::vector& quality, bool removable = true) : removable_(removable), qualScores_(quality) { // if (size() != qualScores_.size()) // throw DimensionException("SequenceWithQuality constructor: sequence and quality must have the same length", qualScores_.size(), size()); } /** @} */ /** * @name Destructor * @{ */ virtual ~SequenceQuality() {} /** @} */ /** * @name The Clonable interface * @{ */ #ifdef NO_VIRTUAL_COV Clonable* #else SequenceQuality* #endif clone() const { return new SequenceQuality(*this); } /** @} */ public: void init(const Sequence& seq) { qualScores_.resize(seq.size()); std::fill(qualScores_.begin(), qualScores_.end(), DEFAULT_QUALITY_VALUE); } const std::string& getType() const { return QUALITY_SCORE; } bool isValidWith(const SequenceWithAnnotation& sequence, bool throwException = true) const { if (throwException && qualScores_.size() != sequence.size()) throw Exception("SequenceQuality. Quality scores must match the sequence size."); return (qualScores_.size() == sequence.size()); } bool isRemovable() const { return removable_; } bool isShared() const { return false; } void beforeSequenceChanged(const SymbolListEditionEvent& event) {} void afterSequenceChanged(const SymbolListEditionEvent& event); void beforeSequenceInserted(const SymbolListInsertionEvent& event) {} void afterSequenceInserted(const SymbolListInsertionEvent& event); void beforeSequenceDeleted(const SymbolListDeletionEvent& event) {} void afterSequenceDeleted(const SymbolListDeletionEvent& event); void beforeSequenceSubstituted(const SymbolListSubstitutionEvent& event) {} void afterSequenceSubstituted(const SymbolListSubstitutionEvent& event) {} size_t getSize() const { return qualScores_.size(); } const int& operator[](size_t i) const { return qualScores_[i]; } int& operator[](size_t i) { return qualScores_[i]; } void setScores(const std::vector& scores) { if (scores.size() != qualScores_.size()) throw DimensionException("SequenceQuality::setScores. Trying to replace score by a vector with different length.", scores.size(), qualScores_.size()); qualScores_ = scores; } /** * @return All scores as a vector. */ const std::vector& getScores() const { return qualScores_; } void setScore(size_t pos, int score) { if (pos >= qualScores_.size()) throw Exception("SequenceQuality::setScore. Vector overflow. Scores number: " + TextTools::toString(qualScores_.size()) + ", but trying to insert score at position " + TextTools::toString(pos) + "."); qualScores_[pos] = score; } void setScores(size_t pos, const std::vector& scores) { if (pos + scores.size() > qualScores_.size()) throw Exception("SequenceQuality::setScores. Vector overflow. Scores number: " + TextTools::toString(qualScores_.size()) + ", but trying to insert " + TextTools::toString(scores.size()) + " scores at position " + TextTools::toString(pos) + "."); std::copy(scores.begin(), scores.end(), qualScores_.begin() + pos); } bool merge(const SequenceAnnotation& anno) { try { const SequenceQuality* qual = & dynamic_cast(anno); VectorTools::append(qualScores_, qual->getScores()); return true; } catch (std::exception& e) { return false; } } SequenceQuality* getPartAnnotation(size_t pos, size_t len) const throw (Exception) { return new SequenceQuality(std::vector(qualScores_.begin() + pos, qualScores_.begin() + pos + len), removable_); } }; /** * @brief A SequenceWithAnnotation class with quality scores attached. * * This classes adds some usefull functions to handle quality scores. * * @see SequenceQuality * @author Sylvain Gaillard, Vincent Cahais, Julien Dutheil */ class SequenceWithQuality : public SequenceWithAnnotation { private: SequenceQuality* qualScores_; public: /** * @name Constructors * @{ */ /** * @brief Build a new empty SequenceWithQuality * * @param alpha A pointer to an Alphabet * * @throw BadCharException if a state is not alowed by the Alphabet */ SequenceWithQuality( const Alphabet* alpha ): SequenceWithAnnotation(alpha), qualScores_(new SequenceQuality(0, false)) { addAnnotation(qualScores_); } /** * @brief Build a new SequenceWithQuality from a std::string * * Build a new SequenceWithQuality and set the quality scores to * the default value DEFAULT_QUALITY_VALUE. * * @param name The name of the sequence * @param sequence The string representing the sequence * @param alpha A pointer to an Alphabet * * @throw BadCharException if a state is not alowed by the Alphabet */ SequenceWithQuality( const std::string& name, const std::string& sequence, const Alphabet* alpha ) throw (BadCharException): SequenceWithAnnotation(name, sequence, alpha), qualScores_(new SequenceQuality(sequence.size(), false)) { addAnnotation(qualScores_); } /** * @brief Build a new SequenceWithQuality from a std::string * * Build a new SequenceWithQuality and set the quality scores to * the default value DEFAULT_QUALITY_VALUE. * * @param name The name of the sequence * @param sequence The string representing the sequence * @param comments Comments to add to the sequence * @param alpha A pointer to an Alphabet * * @throw BadCharException if a state is not alowed by the Alphabet * * @author Vincent Cahais */ SequenceWithQuality( const std::string& name, const std::string& sequence, const Comments& comments, const Alphabet* alpha ) throw (BadCharException): SequenceWithAnnotation(name, sequence, comments, alpha), qualScores_(new SequenceQuality(sequence.size(), false)) { addAnnotation(qualScores_); } /** * @brief Build a new SequenceWithQuality from a std::string * * Build a new SequenceWithQuality and assign quality scores from * a vector of int. * * @param name The name of the sequence * @param sequence The string representing the sequence * @param quality The quality scores * @param alpha A pointer to an alphabet * * @throw BadCharException if a state is not alowed by the Alphabet * @throw DimensionException if the number of quality values is not equal * to the number of sequence states */ SequenceWithQuality( const std::string& name, const std::string& sequence, const std::vector& quality, const Alphabet* alpha) throw (BadCharException, DimensionException): SequenceWithAnnotation(name, sequence, alpha), qualScores_(new SequenceQuality(quality, false)) { addAnnotation(qualScores_); } /** * @brief Build a new SequenceWithQuality from a std::string * * Build a new SequenceWithQuality and assign quality scores from * a vector of int. * * @param name The name of the sequence * @param sequence The string representing the sequence * @param quality The quality scores * @param comments Comments to add to the sequence * @param alpha A pointer to an alphabet * * @throw BadCharException if a state is not alowed by the Alphabet * @throw DimensionException if the number of quality values is not equal * to the number of sequence states * * @author Vincent Cahais */ SequenceWithQuality( const std::string& name, const std::string& sequence, const std::vector& quality, const Comments& comments, const Alphabet* alpha) throw (BadCharException, DimensionException): SequenceWithAnnotation(name, sequence, comments, alpha), qualScores_(new SequenceQuality(quality, false)) { addAnnotation(qualScores_); } /** * @brief Build a new SequenceWithQuality from a std::vector * * Build a new SequenceWithQuality and set the quality scores to * the default value DEFAULT_QUALITY_VALUE. * * @param name The name of the sequence * @param sequence The sequence in int * @param alpha A pointer to an Alphabet * * @throw BadIntException if a state is not alowed by the Alphabet */ SequenceWithQuality( const std::string& name, const std::vector& sequence, const Alphabet* alpha) throw (BadIntException): SequenceWithAnnotation(name, sequence, alpha), qualScores_(new SequenceQuality(sequence.size(), false)) { addAnnotation(qualScores_); } /** * @brief Build a new SequenceWithQuality from a std::vector * * Build a new SequenceWithQuality and set the quality scores to * the default value DEFAULT_QUALITY_VALUE. * * @param name The name of the sequence * @param sequence The sequence in int * @param comments Comments to add to the sequence * @param alpha A pointer to an Alphabet * * @throw BadIntException if a state is not alowed by the Alphabet * * @author Vincent Cahais */ SequenceWithQuality( const std::string& name, const std::vector& sequence, const Comments& comments, const Alphabet* alpha) throw (BadIntException): SequenceWithAnnotation(name, sequence, comments, alpha), qualScores_(new SequenceQuality(sequence.size(), false)) { addAnnotation(qualScores_); } /** * @brief Build a new SequenceWithQuality from a std::vector * * Build a new SequenceWithQuality and assign quality scores from * a vector of int. * * @param name The name of the sequence * @param sequence The sequence in int * @param quality The quality scores * @param alpha A pointer to an Alphabet * * @throw BadIntException if a state is not alowed by the Alphabet * @throw DimensionException if the number of quality values is not equal * to the number of sequence states */ SequenceWithQuality( const std::string& name, const std::vector& sequence, const std::vector& quality, const Alphabet* alpha) throw (BadIntException, DimensionException): SequenceWithAnnotation(name, sequence, alpha), qualScores_(new SequenceQuality(quality, false)) { addAnnotation(qualScores_); } /** * @brief Build a new SequenceWithQuality from a std::vector * * Build a new SequenceWithQuality and assign quality scores from * a vector of int. * * @param name The name of the sequence * @param sequence The sequence in int * @param quality The quality scores * @param comments Comments to add to the sequence * @param alpha A pointer to an Alphabet * * @throw BadIntException if a state is not alowed by the Alphabet * @throw DimensionException if the number of quality values is not equal * to the number of sequence states * * @author Vincent Cahais */ SequenceWithQuality( const std::string& name, const std::vector& sequence, const std::vector& quality, const Comments& comments, const Alphabet* alpha) throw (BadIntException, DimensionException): SequenceWithAnnotation(name, sequence, comments, alpha), qualScores_(new SequenceQuality(quality, false)) { addAnnotation(qualScores_); } /** * @brief Build a new SequenceWithQuality * * Build a new SequenceWithQuality from a Sequence object and set the * quality scores to the default value DEFAULT_QUALITY_VALUE. * * @param s The Sequence object */ SequenceWithQuality(const Sequence& s) : SequenceWithAnnotation(s), qualScores_(new SequenceQuality(s.size(), false)) { addAnnotation(qualScores_); } /** * @brief Build a new SequenceWithQuality * * Build a new SequenceWithQuality from a Sequence object and set the * quality scores from a vector of int. * * @param s The Sequence object * @param sc The quality scores * * @throw DimensionException if the number of quality values is not equal * to the number of sequence states */ SequenceWithQuality( const Sequence& s, const std::vector& sc) throw (DimensionException): SequenceWithAnnotation(s), qualScores_(new SequenceQuality(sc, false)) { addAnnotation(qualScores_); } /** @} */ /** * @name Destructor * @{ */ virtual ~SequenceWithQuality() {} /** @} */ SequenceWithQuality(const SequenceWithQuality& sequence) : SequenceWithAnnotation(sequence), qualScores_(0) { qualScores_ = dynamic_cast(&getAnnotation(SequenceQuality::QUALITY_SCORE)); } SequenceWithQuality& operator=(const SequenceWithQuality& sequence) { SequenceWithAnnotation::operator=(sequence); qualScores_ = dynamic_cast(&getAnnotation(SequenceQuality::QUALITY_SCORE)); return *this; } /** * @name The Clonable interface * @{ */ #ifdef NO_VIRTUAL_COV Clonable* #else SequenceWithQuality* #endif clone() const { return new SequenceWithQuality(*this); } /** @} */ /** * @name Dealing with quality * @{ */ /** * @brief Set the quality score * * @param pos The position where the quality must be set * @param quality The quality value * * @throw IndexOutOfBoundsException if pos is greater than the * sequence size */ void setQuality(size_t pos, int quality) throw (IndexOutOfBoundsException) { //if (pos >= qualScores_->getSize()) // throw IndexOutOfBoundsException("SequenceWithQuality::setQuality: pos out of bounds", pos, 0, qualScores_->getSize() - 1); //qualScores_[pos] = quality; qualScores_->setScore(pos, quality); } /** * @brief Get the quality score * * @param pos The position where the quality is read * * @return The quality score * * @throw IndexOutOfBoundsException if pos is greater than the * sequence size */ int getQuality(size_t pos) const throw (IndexOutOfBoundsException) { if (pos >= qualScores_->getSize()) throw IndexOutOfBoundsException("SequenceWithQuality::getQuality: pos out of bounds", pos, 0, qualScores_->getSize() - 1); return (*qualScores_)[pos]; } /** * @brief Set the whole quality scores * * @param quality The vector of quality scores * * @throw DimensionException if the quality vector does not feet the * sequence size */ void setQualities(const std::vector& quality) throw (DimensionException) { if (quality.size() != qualScores_->getSize()) throw DimensionException("SequenceWithQuality::setQualities: quality must fit sequence size", quality.size(), qualScores_->getSize()); qualScores_->setScores(quality); } /** * @brief Get the whole quality scores * * @return A reference to the quality vector */ const std::vector& getQualities() const { return qualScores_->getScores(); } void append(const std::vector& content) throw (BadIntException) { SequenceWithAnnotation::append(content); } /** * @brief Append content with quality * * @param content A vector of int to append to the sequence * @param qualities A vector of int to append to the qualities * * @throw BadIntException if one of the content int is not in the * Alphabet * @throw DimensionException if qualities does not have the same size as * content */ void append( const std::vector& content, const std::vector& qualities) throw (BadIntException, DimensionException) { if (content.size() != qualities.size()) throw DimensionException("SequenceWithQuality::append: qualities must fit content size", qualities.size(), content.size()); size_t pos = qualScores_->getSize(); append(content); //This automatically extend scores array with default values through the listener //Update scores: qualScores_->setScores(pos, qualities); } void append(const std::vector& content) throw (BadCharException) { SequenceWithAnnotation::append(content); } /** * @brief Append content with quality * * @param content A vector of string to append to the sequence * @param qualities A vector of int to append to the qualities * * @throw BadCharException if one of the content string is not in the * Alphabet * @throw DimensionException if qualities does not have the same size as * content */ void append( const std::vector& content, const std::vector& qualities) throw (BadCharException, DimensionException) { if (content.size() != qualities.size()) throw DimensionException("SequenceWithQuality::append: qualities must fit content size", qualities.size(), content.size()); size_t pos = qualScores_->getSize(); SequenceWithAnnotation::append(content); //This automatically extend scores array with default values through the listener //Update scores: qualScores_->setScores(pos, qualities); } void append(const std::string& content) throw (BadCharException) { SequenceWithAnnotation::append(content); } /** * @brief Append content with quality * * @param content A string to append to the sequence * @param qualities A vector of int to append to the qualities * * @throw BadCharException if one of the character of the string is not in * the Alphabet * @throw DimensionException if qualities does not have the same size as * content */ void append( const std::string& content, const std::vector& qualities) throw (BadCharException, DimensionException) { if (content.size() / this->getAlphabet()->getStateCodingSize() != qualities.size()) throw DimensionException("SequenceWithQuality::append: qualities must fit content size", qualities.size(), content.size() / this->getAlphabet()->getStateCodingSize()); size_t pos = qualScores_->getSize(); SequenceWithAnnotation::append(content); //This automatically extend scores array with default values through the listener //Update scores: qualScores_->setScores(pos, qualities); } void addElement( const std::string& c) throw (BadCharException) { SequenceWithAnnotation::addElement(c); } /** * @brief Add a character to the end of the list with quality * * @param c The element to add to the sequence * @param q The quality of this element * * @throw BadCharException if one of the character of the string is not in * the Alphabet */ void addElement( const std::string& c, int q) throw (BadCharException) { SequenceWithAnnotation::addElement(c); qualScores_->setScore(size() - 1, q); } void addElement(size_t pos, const std::string& c) throw (BadCharException, IndexOutOfBoundsException) { SequenceWithAnnotation::addElement(pos, c); } /** * @brief Add a character to a certain position in the list with quality * * @param pos The position where the element will be inserted * @param c The element to add to the sequence * @param q The quality of this element * * @throw BadCharException if one of the character of the string is not in * the Alphabet * @throw IndexOutOfBoundsException if pos is greater than the sequence * size */ void addElement( size_t pos, const std::string& c, int q) throw (BadCharException, IndexOutOfBoundsException) { SequenceWithAnnotation::addElement(pos, c); qualScores_->setScore(pos, q); } void addElement(int v) throw (BadIntException) { SequenceWithAnnotation::addElement(v); } /** * @brief Add a character to the end of the list with quality * * @param v The element to add to the sequence * @param q The quality of this element * * @throw BadIntException if the value does not match the current Alphabet */ void addElement(int v, int q) throw (BadIntException) { SequenceWithAnnotation::addElement(v); qualScores_->setScore(size() - 1, q); } void addElement(size_t pos, int v) throw (BadIntException, IndexOutOfBoundsException) { SequenceWithAnnotation::addElement(pos, v); } /** * @brief Add a character to a certain position in the list with quality * * @param pos The position where the element will be inserted * @param v The element to add to the sequence * @param q The quality of this element * * @throw BadIntException if the value does not match the current Alphabet * @throw IndexOutOfBoundsException if pos is greater than the sequence * size */ void addElement(size_t pos, int v, int q) throw (BadCharException, IndexOutOfBoundsException) { SequenceWithAnnotation::addElement(pos, v); qualScores_->setScore(pos, q); } /** @} */ }; } // end of namespace bpp. #endif // _SEQUENCEWITHQUALITY_H_ bpp-seq-2.1.0/src/Bpp/Seq/Sequence.cpp000644 000000 000000 00000015260 12147656566 017401 0ustar00rootroot000000 000000 // // File: Sequence.cpp // Created by: Guillaume Deuchst // Julien Dutheil // Created on: Tue Aug 21 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "Sequence.h" // class's header file #include "Alphabet/AlphabetTools.h" #include "StringSequenceTools.h" #include using namespace bpp; // From the STL: #include using namespace std; /* Constructors: **************************************************************/ BasicSequence::BasicSequence(const Alphabet* alpha): BasicSymbolList(alpha), name_(), comments_() {} BasicSequence::BasicSequence(const std::string& name, const std::string& sequence, const Alphabet* alpha) throw (BadCharException) : BasicSymbolList(alpha), name_(name), comments_() { if (sequence!="") setContent(sequence); } BasicSequence::BasicSequence(const std::string& name, const std::string& sequence, const Comments& comments, const Alphabet* alpha) throw (BadCharException) : BasicSymbolList(alpha), name_(name), comments_(comments) { if (sequence!="") setContent(sequence); } BasicSequence::BasicSequence(const std::string& name, const std::vector& sequence, const Alphabet* alpha) throw (BadCharException) : BasicSymbolList(sequence, alpha), name_(name), comments_() {} BasicSequence::BasicSequence(const std::string& name, const std::vector& sequence, const Comments& comments, const Alphabet* alpha) throw (BadCharException) : BasicSymbolList(sequence, alpha), name_(name), comments_(comments) {} BasicSequence::BasicSequence(const std::string& name, const std::vector& sequence, const Alphabet* alpha) throw (BadIntException) : BasicSymbolList(sequence, alpha), name_(name), comments_() {} BasicSequence::BasicSequence(const std::string& name, const std::vector& sequence, const Comments& comments, const Alphabet* alpha) throw (BadIntException) : BasicSymbolList(sequence, alpha), name_(name), comments_(comments) {} /* Copy constructors: *********************************************************/ BasicSequence::BasicSequence(const Sequence& s) : BasicSymbolList(s), name_(s.getName()), comments_(s.getComments()) {} BasicSequence::BasicSequence(const BasicSequence& s) : BasicSymbolList(s), name_(s.getName()), comments_(s.getComments()) {} /* Assignation operator: ******************************************************/ BasicSequence& BasicSequence::operator=(const Sequence& s) { BasicSymbolList::operator=(s); name_ = s.getName(); comments_ = s.getComments(); return *this; } BasicSequence& BasicSequence::operator=(const BasicSequence& s) { BasicSymbolList::operator=(s); name_ = s.getName(); comments_ = s.getComments(); return *this; } /******************************************************************************/ void BasicSequence::setContent(const std::string& sequence) throw (BadCharException) { // Remove blanks in sequence content_ = StringSequenceTools::codeSequence(TextTools::removeWhiteSpaces(sequence), getAlphabet()); //Warning, an exception may be thrown here! } /******************************************************************************/ void BasicSequence::setToSizeR(size_t newSize) { // Size verification size_t seqSize = content_.size(); if (newSize == seqSize) return; if (newSize < seqSize) { content_.resize(newSize); return; } // Add gaps up to specified size int gap = getAlphabet()->getGapCharacterCode(); while (content_.size() < newSize) content_.push_back(gap); } /******************************************************************************/ void BasicSequence::setToSizeL(size_t newSize) { // Size verification size_t seqSize = content_.size(); if (newSize == seqSize) return; if (newSize < seqSize) { //We must truncate sequence from the left. //This is a very unefficient method! content_.erase(content_.begin(), content_.begin() + (seqSize - newSize)); return; } // Add gaps up to specified size int gap = getAlphabet()->getGapCharacterCode(); content_.insert(content_.begin(), newSize - seqSize, gap); } /******************************************************************************/ void BasicSequence::append(const std::vector& content) throw (BadIntException) { // Check list for incorrect characters for (size_t i = 0; i < content.size(); i++) if(!getAlphabet()->isIntInAlphabet(content[i])) throw BadIntException(content[i], "BasicSequence::append", getAlphabet()); //BasicSequence is valid: for (size_t i = 0; i < content.size(); i++) content_.push_back(content[i]); } void BasicSequence::append(const std::vector& content) throw (BadCharException) { // Check list for incorrect characters for (size_t i = 0; i < content.size(); i++) if(!getAlphabet()->isCharInAlphabet(content[i])) throw BadCharException(content[i], "BasicSequence::append", getAlphabet()); //BasicSequence is valid: for (size_t i = 0; i < content.size(); i++) content_.push_back(getAlphabet()->charToInt(content[i])); } void BasicSequence::append(const std::string& content) throw (BadCharException) { append(StringSequenceTools::codeSequence(content, getAlphabet())); } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/CodonSiteTools.cpp000644 000000 000000 00000066760 12147656566 020554 0ustar00rootroot000000 000000 // // File CodonSiteTools.cpp // Author : Sylvain Glémin // Last modification : October 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "CodonSiteTools.h" #include "Alphabet/StandardCodonAlphabet.h" #include "Alphabet/CodonAlphabet.h" #include "Alphabet/DNA.h" #include "Alphabet/AlphabetTools.h" #include "SiteTools.h" #include "GeneticCode/GeneticCode.h" #include "GeneticCode/StandardGeneticCode.h" #include #include #include using namespace bpp; // From the STL: #include using namespace std; /******************************************************************************/ bool CodonSiteTools::hasGapOrStop(const Site& site) throw (AlphabetException) { // Alphabet checking if (!AlphabetTools::isCodonAlphabet(site.getAlphabet())) throw AlphabetException("CodonSiteTools::hasGapOrStop: alphabet is not CodonAlphabet", site.getAlphabet()); for (size_t i = 0; i < site.size(); i++) { if (site[i] < 0) return true; } return false; } /******************************************************************************/ bool CodonSiteTools::hasStop(const Site& site) throw (AlphabetException) { // Alphabet checking if (!AlphabetTools::isCodonAlphabet(site.getAlphabet())) throw AlphabetException("CodonSiteTools::hasStop: alphabet is not CodonAlphabet", site.getAlphabet()); const CodonAlphabet* ca = dynamic_cast(site.getAlphabet()); for (size_t i = 0; i < site.size(); i++) { if (ca->isStop(site[i])) return true; } return false; } /******************************************************************************/ bool CodonSiteTools::isMonoSitePolymorphic(const Site& site) throw (AlphabetException, EmptySiteException) { // Alphabet checking if (!AlphabetTools::isCodonAlphabet(site.getAlphabet())) throw AlphabetException("CodonSiteTools::isMonoSitePolymorphic: alphabet is not CodonAlphabet", site.getAlphabet()); // Empty site checking if (site.size() == 0) throw EmptySiteException("CodonSiteTools::isMonoSitePolymorphic: Incorrect specified site", &site); // Global polymorphism checking if (SiteTools::isConstant(site)) return false; // initialisation of the 3 sub-sites ot the codon vector pos1, pos2, pos3; const CodonAlphabet* ca = dynamic_cast(site.getAlphabet()); for (size_t i = 0; i < site.size(); i++) { pos1.push_back(ca->getFirstPosition(site[i])); pos2.push_back(ca->getSecondPosition(site[i])); pos3.push_back(ca->getThirdPosition(site[i])); } const NucleicAlphabet* na = ca->getNucleicAlphabet(); Site s1(pos1, na), s2(pos2, na), s3(pos3, na); // polymorphism checking for each sub-sites size_t nbpol = 0; if (!SiteTools::isConstant(s1)) nbpol++; if (!SiteTools::isConstant(s2)) nbpol++; if (!SiteTools::isConstant(s3)) nbpol++; if (nbpol > 1) return false; return true; } /******************************************************************************/ bool CodonSiteTools::isSynonymousPolymorphic(const Site& site, const GeneticCode& gc) throw (AlphabetException, AlphabetMismatchException, EmptySiteException) { // Alphabet checking if (!AlphabetTools::isCodonAlphabet(site.getAlphabet())) throw AlphabetException("CodonSiteTools::isSynonymousPolymorphic: alphabet is not CodonAlphabet", site.getAlphabet()); if (typeid(site.getAlphabet()) != typeid(gc.getSourceAlphabet())) throw AlphabetMismatchException("CodonSiteTools::isSynonymousPolymorphic: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gc.getSourceAlphabet()); // Empty site checking if (site.size() == 0) throw EmptySiteException("CodonSiteTools::isSynonymousPolymorphic: Incorrect specified site", &site); // Global polymorphism checking if (SiteTools::isConstant(site)) return false; // Synonymous polymorphism checking vector prot; int first_aa = gc.translate(site[0]); for (size_t i = 1; i < site.size(); i++) { int aa = gc.translate(site[i]); if (aa != first_aa) return false; } return true; } /******************************************************************************/ Site* CodonSiteTools::generateCodonSiteWithoutRareVariant(const Site& site, double freqmin) throw (AlphabetException, EmptySiteException) { // Alphabet checking if (!AlphabetTools::isCodonAlphabet(site.getAlphabet())) throw AlphabetException("CodonSiteTools::generateCodonSiteWithoutRareVariant: alphabet is not CodonAlphabet", site.getAlphabet()); // Empty site checking if (site.size() == 0) throw EmptySiteException("CodonSiteTools::generateCodonSiteWithoutRareVariant: Incorrect specified site", &site); if (SiteTools::isConstant(site)) { Site* noRareVariant = new Site(site); return noRareVariant; } else { // Computation map freqcodon; SiteTools::getFrequencies(site, freqcodon); const CodonAlphabet* ca = dynamic_cast(site.getAlphabet()); const NucleicAlphabet* na = ca->getNucleicAlphabet(); int newcodon = -1; for (map::iterator it = freqcodon.begin(); it != freqcodon.end(); it++) { if (it->second > freqmin && !ca->isStop(it->first)) { newcodon = it->first; break; } } vector pos1, pos2, pos3; for (size_t i = 0; i < site.size(); i++) { pos1.push_back(ca->getFirstPosition(site[i])); pos2.push_back(ca->getSecondPosition(site[i])); pos3.push_back(ca->getThirdPosition(site[i])); } Site s1(pos1, na), s2(pos2, na), s3(pos3, na); map freq1; SiteTools::getFrequencies(s1, freq1); map freq2; SiteTools::getFrequencies(s2, freq2); map freq3; SiteTools::getFrequencies(s3, freq3); vector codon; for (size_t i = 0; i < site.size(); i++) { if (freq1[s1.getValue(i)] > freqmin && freq2[s2.getValue(i)] > freqmin && freq3[s3.getValue(i)] > freqmin) { codon.push_back(site.getValue(i)); } else codon.push_back(newcodon); } Site* noRareVariant = new Site(codon, ca); return noRareVariant; } } /******************************************************************************/ size_t CodonSiteTools::numberOfDifferences(int i, int j, const CodonAlphabet& ca) { size_t nbdif = 0; if (ca.getFirstPosition(i) != ca.getFirstPosition(j)) nbdif++; if (ca.getSecondPosition(i) != ca.getSecondPosition(j)) nbdif++; if (ca.getThirdPosition(i) != ca.getThirdPosition(j)) nbdif++; return nbdif; } /******************************************************************************/ double CodonSiteTools::numberOfSynonymousDifferences(int i, int j, const GeneticCode& gc, bool minchange) { const CodonAlphabet* ca = dynamic_cast(gc.getSourceAlphabet()); vector ci = ca->getPositions(i); vector cj = ca->getPositions(j); switch (numberOfDifferences(i, j, *ca)) { case 0: return 0; case 1: { if (gc.areSynonymous(i, j)) return 1; return 0; } case 2: { if (gc.areSynonymous(i, j)) return 2; vector path(2, 0); // Vector of number of synonymous changes per path (2 here) vector weight(2, 1); // Weight to exclude path through stop codon if (ci[0] == cj[0]) { int trans1 = ca->getCodon(ci[0], cj[1], ci[2]); // transitory codon between NcNiNi et NcNjNj: NcNjNi, Nc = identical site int trans2 = ca->getCodon(ci[0], ci[1], cj[2]); // transitory codon between NcNiNi et NcNjNj: NcNiNj, Nc = identical site if (!ca->isStop(trans1)) { if (gc.areSynonymous(i, trans1)) path[0]++; if (gc.areSynonymous(trans1, j)) path[0]++; } else weight[0] = 0; if (!ca->isStop(trans2)) { if (gc.areSynonymous(i, trans2)) path[1]++; if (gc.areSynonymous(trans2, j)) path[1]++; } else weight[1] = 0; } if (ci[1] == cj[1]) { int trans1 = ca->getCodon(cj[0], ci[1], ci[2]); // transitory codon between NiNcNi et NjNcNj: NjNcNi, Nc = identical site int trans2 = ca->getCodon(ci[0], ci[1], cj[2]); // transitory codon between NiNcNi et NjNcNj: NiNcNj, Nc = identical site if (!ca->isStop(trans1)) { if (gc.areSynonymous(i, trans1)) path[0]++; if (gc.areSynonymous(trans1, j)) path[0]++; } else weight[0] = 0; if (!ca->isStop(trans2)) { if (gc.areSynonymous(i, trans2)) path[1]++; if (gc.areSynonymous(trans2, j)) path[1]++; } else weight[1] = 0; } if (ci[2] == cj[2]) { int trans1 = ca->getCodon(cj[0], ci[1], ci[2]); // transitory codon between NiNiNc et NjNjNc: NjNiNc, Nc = identical site int trans2 = ca->getCodon(ci[0], cj[1], ci[2]); // transitory codon between NiNiNc et NjNjNc: NiNjNc, Nc = identical site if (!ca->isStop(trans1)) { if (gc.areSynonymous(i, trans1)) path[0]++; if (gc.areSynonymous(trans1, j)) path[0]++; } else weight[0] = 0; if (!ca->isStop(trans2)) { if (gc.areSynonymous(i, trans2)) path[1]++; if (gc.areSynonymous(trans2, j)) path[1]++; } else weight[1] = 0; } if (minchange) return VectorTools::max(path); double nbdif = 0; for (size_t k = 0; k < 2; k++) { nbdif += path[k] * weight[k]; } return nbdif / VectorTools::sum(weight); } case 3: { vector path(6, 0); vector weight(6, 1); // First transitory codons int trans100 = ca->getCodon(cj[0], ci[1], ci[2]); int trans010 = ca->getCodon(ci[0], cj[1], ci[2]); int trans001 = ca->getCodon(ci[0], ci[1], cj[2]); // Second transitory codons int trans110 = ca->getCodon(cj[0], cj[1], ci[2]); int trans101 = ca->getCodon(cj[0], ci[1], cj[2]); int trans011 = ca->getCodon(ci[0], cj[1], cj[2]); // Paths if (!ca->isStop(trans100)) { if (gc.areSynonymous(i, trans100)) { path[0]++; path[1]++; } if (!ca->isStop(trans110)) { if (gc.areSynonymous(trans100, trans110)) path[0]++; if (gc.areSynonymous(trans110, j)) path[0]++; } else weight[0] = 0; if (!ca->isStop(trans101)) { if (gc.areSynonymous(trans100, trans101)) path[1]++; if (gc.areSynonymous(trans101, j)) path[1]++; } else weight[1] = 0; } else { weight[0] = 0; weight[1] = 0; } if (!ca->isStop(trans010)) { if (gc.areSynonymous(i, trans010)) { path[2]++; path[3]++; } if (!ca->isStop(trans110)) { if (gc.areSynonymous(trans010, trans110)) path[2]++; if (gc.areSynonymous(trans110, j)) path[2]++; } else weight[2] = 0; if (!ca->isStop(trans011)) { if (gc.areSynonymous(trans010, trans011)) path[3]++; if (gc.areSynonymous(trans011, j)) path[3]++; } else weight[3] = 0; } else { weight[2] = 0; weight[3] = 0; } if (!ca->isStop(trans001)) { if (gc.areSynonymous(i, trans001)) { path[4]++; path[5]++; } if (!ca->isStop(trans101)) { if (gc.areSynonymous(trans001, trans101)) path[4]++; if (gc.areSynonymous(trans101, j)) path[4]++; } else weight[4] = 0; if (!ca->isStop(trans011)) { if (gc.areSynonymous(trans001, trans011)) path[5]++; if (gc.areSynonymous(trans011, j)) path[5]++; } else weight[5] = 0; } else { weight[4] = 0; weight[5] = 0; } if (minchange) return VectorTools::max(path); double nbdif = 0; for (size_t k = 0; k < 6; k++) { nbdif += path[k] * weight[k]; } return nbdif / VectorTools::sum(weight); } } // This line is never reached but sends a warning if not there: return 0.; } /******************************************************************************/ double CodonSiteTools::piSynonymous(const Site& site, const GeneticCode& gc, bool minchange) throw (AlphabetException, AlphabetMismatchException, EmptySiteException) { // Alphabet checking if (!AlphabetTools::isCodonAlphabet(site.getAlphabet())) throw AlphabetException("CodonSiteTools::piSynonymous: alphabet is not CodonAlphabet", site.getAlphabet()); if (typeid(site.getAlphabet()) != typeid(gc.getSourceAlphabet())) throw AlphabetMismatchException("CodonSiteTools::piSynonymous: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gc.getSourceAlphabet()); // Empty site checking if (site.size() == 0) throw EmptySiteException("CodonSiteTools::piSynonymous: Incorrect specified site", &site); // General polymorphism checking if (SiteTools::isConstant(site)) return 0; // Computation map freq; SiteTools::getFrequencies(site, freq); double pi = 0; for (map::iterator it1 = freq.begin(); it1 != freq.end(); it1++) { for (map::iterator it2 = freq.begin(); it2 != freq.end(); it2++) { pi += (it1->second) * (it2->second) * (numberOfSynonymousDifferences(it1->first, it2->first, gc, minchange)); } } size_t n = site.size(); return pi * static_cast(n / (n - 1)); } /******************************************************************************/ double CodonSiteTools::piNonSynonymous(const Site& site, const GeneticCode& gc, bool minchange) throw (AlphabetException, AlphabetMismatchException, EmptySiteException) { // Alphabet checking if (!AlphabetTools::isCodonAlphabet(site.getAlphabet())) throw AlphabetException("CodonSiteTools::piNonSynonymous: alphabet is not CodonAlphabet", site.getAlphabet()); if (typeid(site.getAlphabet()) != typeid(gc.getSourceAlphabet())) throw AlphabetMismatchException("CodonSiteTools::piNonSynonymous: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gc.getSourceAlphabet()); // Empty site checking if (site.size() == 0) throw EmptySiteException("CodonSiteTools::piSynonymous: Incorrect specified site", &site); // General polymorphism checking if (SiteTools::isConstant(site)) return 0; if (isSynonymousPolymorphic(site, gc)) return 0; // Computation map freq; SiteTools::getFrequencies(site, freq); const CodonAlphabet* ca = dynamic_cast(site.getAlphabet()); double pi = 0; for (map::iterator it1 = freq.begin(); it1 != freq.end(); it1++) { for (map::iterator it2 = freq.begin(); it2 != freq.end(); it2++) { double nbtot = static_cast(numberOfDifferences(it1->first, it2->first, *ca)); double nbsyn = numberOfSynonymousDifferences(it1->first, it2->first, gc, minchange); pi += (it1->second) * (it2->second) * (nbtot - nbsyn); } } size_t n = site.size(); return pi * static_cast(n / (n - 1)); } /******************************************************************************/ double CodonSiteTools::numberOfSynonymousPositions(int i, const GeneticCode& gc, double ratio) throw (Exception) { try { const CodonAlphabet* ca = dynamic_cast(gc.getSourceAlphabet()); if (ca->isStop(i)) return 0; if (ca->isUnresolved(i)) return 0; double nbsynpos = 0.0; vector codon = ca->getPositions(i); int acid = gc.translate(i); for (int pos = 0; pos < 3; pos++) { for (int an = 0; an < 4; an++) { if (an == codon[pos]) continue; vector mutcodon = codon; mutcodon[pos] = an; int intcodon = ca->getCodon(mutcodon[0], mutcodon[1], mutcodon[2]); if (ca->isStop(intcodon)) continue; int altacid = gc.translate(intcodon); if (altacid == acid) // if synonymous { if (((codon[pos] == 0 || codon[pos] == 2) && (mutcodon[pos] == 1 || mutcodon[pos] == 3)) || ((codon[pos] == 1 || codon[pos] == 3) && (mutcodon[pos] == 0 || mutcodon[pos] == 2))) // if it is a transversion { nbsynpos = nbsynpos + 1 / (ratio + 2); } else // if transition { nbsynpos = nbsynpos + ratio / (ratio + 2); } } } } return nbsynpos; } catch (...) {} // !!!!! en cas d'exception, plante! il faudrait forwarder l'exception // This line is never reached but sends a warning if not there: return 0.; } /******************************************************************************/ double CodonSiteTools::meanNumberOfSynonymousPositions(const Site& site, const GeneticCode& gc, double ratio) throw (AlphabetException, AlphabetMismatchException, EmptySiteException) { // Alphabet checking if (!AlphabetTools::isCodonAlphabet(site.getAlphabet())) throw AlphabetException("CodonSiteTools::meanNumberOfSynonymousPositions: alphabet is not CodonAlphabet", site.getAlphabet()); if (typeid(site.getAlphabet()) != typeid(gc.getSourceAlphabet())) throw AlphabetMismatchException("CodonSiteTools::meanNumberOfSynonymousPositions: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gc.getSourceAlphabet()); // Empty site checking if (site.size() == 0) throw EmptySiteException("CodonSiteTools::meanNumberOfSynonymousPositions: Incorrect specified site", &site); // Computation double NbSyn = 0; map freq; SiteTools::getFrequencies(site, freq); for (map::iterator it = freq.begin(); it != freq.end(); it++) { NbSyn += (it->second) * numberOfSynonymousPositions(it->first, gc, ratio); } return NbSyn; } /******************************************************************************/ size_t CodonSiteTools::numberOfSubsitutions(const Site& site, double freqmin) throw (AlphabetException, EmptySiteException) { // Alphabet checking if (!AlphabetTools::isCodonAlphabet(site.getAlphabet())) throw AlphabetException("CodonSiteTools::numberOfSubsitutions: alphabet is not CodonAlphabet", site.getAlphabet()); // Empty site checking if (site.size() == 0) throw EmptySiteException("CodonSiteTools::numberOfSubsitutions: Incorrect specified site", &site); if (SiteTools::isConstant(site)) return 0; Site* newsite; if (freqmin > 1. / static_cast(site.size())) newsite = CodonSiteTools::generateCodonSiteWithoutRareVariant(site, freqmin); else newsite = new Site(site); // Computation if (SiteTools::hasGap(*newsite)) return 0; vector pos1, pos2, pos3; const CodonAlphabet* ca = dynamic_cast(site.getAlphabet()); for (size_t i = 0; i < newsite->size(); i++) { pos1.push_back(ca->getFirstPosition(newsite->getValue(i))); pos2.push_back(ca->getSecondPosition(newsite->getValue(i))); pos3.push_back(ca->getThirdPosition(newsite->getValue(i))); } const NucleicAlphabet* na = ca->getNucleicAlphabet(); Site s1(pos1, na), s2(pos2, na), s3(pos3, na); size_t Scodon = SiteTools::getNumberOfDistinctCharacters(*newsite) - 1; size_t Sbase = SiteTools::getNumberOfDistinctCharacters(s1) + SiteTools::getNumberOfDistinctCharacters(s2) + SiteTools::getNumberOfDistinctCharacters(s3) - 3; delete newsite; if (Scodon >= Sbase) return Scodon; else return Sbase; } /******************************************************************************/ size_t CodonSiteTools::numberOfNonSynonymousSubstitutions(const Site& site, const GeneticCode& gc, double freqmin) throw (AlphabetException, AlphabetMismatchException, EmptySiteException) { // Alphabet checking if (!AlphabetTools::isCodonAlphabet(site.getAlphabet())) throw AlphabetException("CodonSiteTools::numberOfNonSynonymousSubstitutions: alphabet is not CodonAlphabet", site.getAlphabet()); if (typeid(site.getAlphabet()) != typeid(gc.getSourceAlphabet())) throw AlphabetMismatchException("CodonSiteTools::numberOfNonSynonymousSubstitutions: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gc.getSourceAlphabet()); // Empty site checking if (site.size() == 0) throw EmptySiteException("CodonSiteTools::numberOfNonSynonymousSubstitutions: Incorrect specified site", &site); if (SiteTools::isConstant(site)) return 0; Site* newsite; if (freqmin > 1. / static_cast(site.size())) newsite = generateCodonSiteWithoutRareVariant(site, freqmin); else newsite = new Site(site); if (SiteTools::hasGap(*newsite)) return 0; // computation map count; SiteTools::getCounts(*newsite, count); size_t NaSup = 0; size_t Nminmin = 10; const CodonAlphabet* ca = dynamic_cast(site.getAlphabet()); for (map::iterator it1 = count.begin(); it1 != count.end(); it1++) { size_t Nmin = 10; for (map::iterator it2 = count.begin(); it2 != count.end(); it2++) { size_t Ntot = numberOfDifferences(it1->first, it2->first, *ca); size_t Ns = (size_t)numberOfSynonymousDifferences(it1->first, it2->first, gc, true); if (Nmin > Ntot - Ns && it1->first != it2->first) Nmin = Ntot - Ns; } NaSup += Nmin; if (Nmin < Nminmin) Nminmin = Nmin; } delete newsite; return NaSup - Nminmin; } /******************************************************************************/ vector CodonSiteTools::fixedDifferences(const Site& siteIn, const Site& siteOut, int i, int j, const GeneticCode& gc) throw (AlphabetException, AlphabetMismatchException, EmptySiteException) { // Alphabet checking if (!AlphabetTools::isCodonAlphabet(siteIn.getAlphabet())) throw AlphabetException("CodonSiteTools::fixedDifferences: alphabet is not CodonAlphabet (siteIn)", siteIn.getAlphabet()); if (!AlphabetTools::isCodonAlphabet(siteOut.getAlphabet())) throw AlphabetException("CodonSiteTools::fixedDifferences: alphabet is not CodonAlphabet (siteOut)", siteOut.getAlphabet()); if (typeid(siteIn.getAlphabet()) != typeid(gc.getSourceAlphabet())) throw AlphabetMismatchException("CodonSiteTools::fixedDifferences: siteIn and genetic code have not the same codon alphabet.", siteIn.getAlphabet(), gc.getSourceAlphabet()); if (typeid(siteOut.getAlphabet()) != typeid(gc.getSourceAlphabet())) throw AlphabetMismatchException("CodonSiteTools::fixedDifferences: siteOut and genetic code have not the same codon alphabet.", siteOut.getAlphabet(), gc.getSourceAlphabet()); // Empty site checking if (siteIn.size() == 0) throw EmptySiteException("CodonSiteTools::getFixedDifferences Incorrect specified site", &siteIn); if (siteOut.size() == 0) throw EmptySiteException("CodonSiteTools::getFixedDifferences Incorrect specified site", &siteOut); const CodonAlphabet* ca = dynamic_cast(gc.getSourceAlphabet()); size_t Ntot = numberOfDifferences(i, j, *ca); size_t Ns = (size_t) numberOfSynonymousDifferences(i, j, gc, true); size_t Na = Ntot - Ns; size_t Nfix = Ntot; vector pos1in, pos2in, pos3in, pos1out, pos2out, pos3out; for (size_t k = 0; k < siteIn.size(); k++) { pos1in.push_back(ca->getFirstPosition(siteIn[k])); pos2in.push_back(ca->getSecondPosition(siteIn[k])); pos3in.push_back(ca->getThirdPosition(siteIn[k])); pos1out.push_back(ca->getFirstPosition(siteOut[k])); pos2out.push_back(ca->getSecondPosition(siteOut[k])); pos3out.push_back(ca->getThirdPosition(siteOut[k])); } const NucleicAlphabet* na = ca->getNucleicAlphabet(); Site s1in(pos1in, na), s2in(pos2in, na), s3in(pos3in, na); Site s1out(pos1out, na), s2out(pos2out, na), s3out(pos3out, na); bool test1 = false; bool test2 = false; bool test3 = false; if ( (!SiteTools::isConstant(s1in) || !SiteTools::isConstant(s1out)) && ca->getFirstPosition(i) != ca->getFirstPosition(j) ) { test1 = true; Nfix--; } if ( (!SiteTools::isConstant(s2in) || !SiteTools::isConstant(s2out)) && ca->getSecondPosition(i) != ca->getSecondPosition(j) ) { test2 = true; Nfix--; } if ( (!SiteTools::isConstant(s3in) || !SiteTools::isConstant(s3out)) && ca->getThirdPosition(i) != ca->getThirdPosition(j) ) { test3 = true; Nfix--; } // Suppression of differences when not fixed vector v(2); if (Nfix == 0) { v[0] = 0; v[1] = 0; return v; } if (Nfix < Ntot) { if (Na == 0) Ns = Nfix; if (Ns == 0) Na = Nfix; else { if (Ntot == 3) { if (Nfix == 1) { if (test1 && test2) { Na = 0; Ns = 1; } if (test1 && test3) { Na = 1; Ns = 0; } if (test2 && test3) { Na--; Ns--; } } } if (Nfix == 2) { if (test1) { Na = 1; Ns = 1; } if (test2) Na--; if (test3) Ns--; } } if (Ntot == 2) { if (test1) { if (ca->getSecondPosition(i) == ca->getSecondPosition(j)) Na--; else Ns--; } if (test2) Na--; if (test3) Ns--; } } v[0] = Ns; v[1] = Na; return v; } /******************************************************************************/ bool CodonSiteTools::isFourFoldDegenerated(const Site& site, const GeneticCode& gc) { if (!SiteTools::isConstant(site, true)) { /** If non-synonymous mutation **/ if (!(CodonSiteTools::isSynonymousPolymorphic(site, gc))) return false; for (size_t i = 0; i < site.size(); i++) { if (!(gc.isFourFoldDegenerated(site.getValue(i)))) { return false; } } } else { for (size_t i = 0; i < site.size(); i++) { if (!(gc.isFourFoldDegenerated(site.getValue(i)))) { return false; } } } return true; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Transliterator.h000644 000000 000000 00000015417 12147656566 020317 0ustar00rootroot000000 000000 // // File: Transliterator.h // Created by: Julien Dutheil // Created on: Sun Oct 12 14:25:25 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _TRANSLITERATOR_H_ #define _TRANSLITERATOR_H_ #include "Alphabet/Alphabet.h" #include "Sequence.h" namespace bpp { /** * @brief This interface is used when translating a sequence from an alphabet to another: it gives the translation rules, eg: RNA -> DNA. */ class Transliterator { public: Transliterator() {} virtual ~Transliterator() {} public: /** * @brief Get the source alphabet. * * @return The source alphabet. */ virtual const Alphabet * getSourceAlphabet() const = 0; /** * @brief Get the target alphabet. * * @return The target alphabet. */ virtual const Alphabet * getTargetAlphabet() const = 0; /** * @brief Translate a given state coded as a int from source alphabet to target alphabet. * * @param state A state in source alphabet. * @return The corresponding state in target alphabet. * @throw BadIntException If the state is not a proper state for source alphabet. * @throw Exception Other kind of error, depending on the implementation. */ virtual int translate(int state) const throw (BadIntException, Exception) = 0; /** * @brief Translate a given state coded as a string from source alphabet to target alphabet. * * @param state A state in source alphabet. * @return The corresponding state in target alphabet. * @throw BadCharException If the state is not a proper state for source alphabet. * @throw Exception Other kind of error, depending on the implementation. */ virtual std::string translate(const std::string & state) const throw (BadCharException, Exception) = 0; /** * @brief Translate a whole sequence from source alphabet to target alphabet. * * @param sequence A sequence in source alphabet. * @return The corresponding sequence in target alphabet. * @throw AlphabetMismatchException If the sequence alphabet do not match the source alphabet. * @throw Exception Other kind of error, depending on the implementation. */ virtual Sequence * translate(const Sequence & sequence) const throw (AlphabetMismatchException, Exception) = 0; }; /** * @brief The same as previous, but can perform the reverse translation, eg: RNA -> DNA and DNA -> RNA; */ class ReverseTransliterator: public virtual Transliterator { public: ReverseTransliterator() {} virtual ~ReverseTransliterator() {} public: /** * @brief Translate a given state coded as a int from target alphabet to source alphabet. * * @param state A state in target alphabet. * @return The corresponding state in source alphabet. * @throw BadIntException If the state is not a proper state for target alphabet. * @throw Exception Other kind of error, depending on the implementation. */ virtual int reverse(int state) const throw (BadIntException, Exception) = 0; /** * @brief Translate a given state coded as a string from target alphabet to source alphabet. * * @param state A state in target alphabet. * @return The corresponding state in source alphabet. * @throw BadCharException If the state is not a proper state for target alphabet. * @throw Exception Other kind of error, depending on the implementation. */ virtual std::string reverse(const std::string & state) const throw (BadCharException, Exception) = 0; /** * @brief Translate a whole sequence from target alphabet to source alphabet. * * @param sequence A sequence in target alphabet. * @return The corresponding sequence in source alphabet. * @throw AlphabetMismatchException If the sequence alphabet do not match the target alphabet. * @throw Exception Other kind of error, depending on the implementation. */ virtual Sequence * reverse(const Sequence & sequence) const throw (AlphabetMismatchException, Exception) = 0; }; /** * @brief Partial implementation of the Transliterator interface. */ class AbstractTransliterator: public virtual Transliterator { public: AbstractTransliterator() {} virtual ~AbstractTransliterator() {} public: virtual int translate(int state) const throw (BadIntException, Exception) = 0; virtual std::string translate(const std::string & state) const throw (BadCharException, Exception) = 0; virtual Sequence * translate(const Sequence & sequence) const throw (AlphabetMismatchException, Exception); }; /** * @brief Partial implementation of the ReverseTransliterator interface. */ class AbstractReverseTransliterator: public ReverseTransliterator, public AbstractTransliterator { public: AbstractReverseTransliterator() {} virtual ~AbstractReverseTransliterator() {} public: //These two redeclarations must be here because of the multiple inheritance. virtual const Alphabet * getSourceAlphabet() const = 0; virtual const Alphabet * getTargetAlphabet() const = 0; virtual int reverse(int state) const throw (BadIntException, Exception) = 0; virtual std::string reverse(const std::string & state) const throw (BadCharException, Exception) = 0; virtual Sequence * reverse(const Sequence & sequence) const throw (AlphabetMismatchException, Exception); }; } //end of namespace bpp. #endif //_TRANSLITERATOR_H_ bpp-seq-2.1.0/src/Bpp/Seq/SiteExceptions.h000644 000000 000000 00000006433 12147656566 020246 0ustar00rootroot000000 000000 // // File: SiteExceptions.h // Author : Julien Dutheil // Created on: dim mar 7 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SITEEXCEPTIONS_H_ #define _SITEEXCEPTIONS_H_ #include namespace bpp { class Site; /** * @brief The site exception base class. * * @see Exception */ class SiteException: public Exception { private: /** * @brief A pointer toward a site object. */ const Site* site_; public: // Class constructor /** * @brief Build a new SiteException object. * * @param text A message to be passed to the exception hierarchy. * @param s A const pointer toward the site that threw the exception. */ SiteException(const std::string& text, const Site* s = 0); SiteException(const SiteException& se): Exception(se), site_(se.site_) {} SiteException& operator=(const SiteException& se) { Exception::operator=(se); site_ = se.site_; return *this; } // Class destructor virtual ~SiteException() throw() {} public: /** * @brief Get the site that threw the exception. * * @return A const pointer toward the site. */ virtual const Site* getSite() const { return site_; }; }; /** * @brief Exception sent when a empty site is found. */ class EmptySiteException: public SiteException { public: EmptySiteException(const std::string& text, const Site* s = 0): SiteException(text, s) {} virtual ~EmptySiteException() throw() {} }; /** * @brief Exception sent when a site containing gap is found. */ class SiteWithGapException: public SiteException { public: SiteWithGapException(const std::string& text, const Site* s = 0): SiteException(text, s) {} virtual ~SiteWithGapException() throw() {} }; } //end of namespace bpp. #endif // _SITEEXCEPTIONS_H_ bpp-seq-2.1.0/src/Bpp/Seq/SequencePositionIterators.h000644 000000 000000 00000015064 12147656566 022472 0ustar00rootroot000000 000000 // // File: SequencePositionIterators.h // Author: Sylvain Gaillard // Created: 23/06/2009 10:35:28 // /* Copyright or © or Copr. Bio++ Development Team, (June 23, 2009) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCEPOSITIONITERATORS_H_ #define _SEQUENCEPOSITIONITERATORS_H_ // from STL #include #include "Sequence.h" namespace bpp { /** * @brief Loop over a Sequence. * * This is the SequencePositionIterator interface. * * @author Sylvain Gaillard */ class SequencePositionIterator { public: SequencePositionIterator() {} virtual ~SequencePositionIterator() {} public: /** * @brief Get the actual position of the iterator in the Sequence. */ virtual unsigned int getPosition() const = 0; /** * @brief Set the position of the iterator. * @param pos The position on the Sequence */ virtual void setPosition(unsigned int pos) = 0; /** * @brief Get the numerical value of the Sequence at current position. */ virtual int getValue() const = 0; /** * @brief Get the textual value of the Sequence at current position. */ virtual std::string getChar() const = 0; virtual bool operator==(const SequencePositionIterator & it) const = 0; virtual bool operator!=(const SequencePositionIterator & it) const = 0; virtual SequencePositionIterator & operator+=(int i) = 0; virtual SequencePositionIterator & operator-=(int i) = 0; virtual SequencePositionIterator& operator++() = 0; /** * @brief Tells if there is more positions in the Sequence. * @return true if there is more positions in the Sequence */ virtual bool hasMorePositions() const = 0; /** * @brief Get the Sequence on which the iterator loops. * @return A reference toward the Sequence object. */ virtual const Sequence & getSequence() const = 0; }; /** * @brief Partial implementation of the SequencePositionIterator interface. * * @author Sylvain Gaillard */ class AbstractSequencePositionIterator : public virtual SequencePositionIterator { private: const Sequence* sequence_; unsigned int currentPosition_; public: AbstractSequencePositionIterator(const Sequence& seq, unsigned int pos = 0) : sequence_(&seq), currentPosition_(pos) {} AbstractSequencePositionIterator(const AbstractSequencePositionIterator& aspi) : sequence_(aspi.sequence_), currentPosition_(aspi.currentPosition_) {} AbstractSequencePositionIterator& operator=(const AbstractSequencePositionIterator& aspi) { sequence_ = aspi.sequence_; currentPosition_ = aspi.currentPosition_; return *this; } virtual ~AbstractSequencePositionIterator() {} public: /** * @name Comparison operators * * @{ */ bool operator==(const SequencePositionIterator& it) const; bool operator!=(const SequencePositionIterator& it) const; /** @} */ unsigned int getPosition() const; void setPosition(unsigned int pos); int getValue() const; std::string getChar() const; const Sequence& getSequence() const; }; /** * @brief Loop over all positions in a Sequence * * This is the simplest implementation of SequencePositionIterator. * It just loops over all positions of a Sequence. * * @code * Sequence seq = Sequence("seq1", "ATTCGATCCG-G", &AlphabetTools::DNA_ALPHABET); * for (SimpleSequencePositionIterator it(seq) ; it.hasMorePositions() ; ++it) { * cout << it.getPosition() << " : " << it.getValue() << " (" << it.getChar() << ")" << endl; * } * @endcode * * @author Sylvain Gaillard */ class SimpleSequencePositionIterator: public AbstractSequencePositionIterator { public: /** * @name Constructors and destructor * * @{ */ /** * @brief General constructor * * @param seq A reference toward the Sequence object we want to loop over * @param pos Optional integer where to start on the Sequence object * */ SimpleSequencePositionIterator(const Sequence& seq, unsigned int pos = 0): AbstractSequencePositionIterator(seq, pos) {} /** * @brief Copie constructor. * * @param it A reference toward a SequencePositionIterator */ SimpleSequencePositionIterator(const SequencePositionIterator& it); virtual ~SimpleSequencePositionIterator() {} /** @} */ public: /** * @name Operators * * @{ */ SimpleSequencePositionIterator & operator++(); virtual SimpleSequencePositionIterator operator++(int i); SimpleSequencePositionIterator & operator+=(int i); SimpleSequencePositionIterator & operator-=(int i); virtual SimpleSequencePositionIterator operator+(int i) const; virtual SimpleSequencePositionIterator operator-(int i) const; /** @} */ bool hasMorePositions() const; }; } #endif //_SEQUENCEPOSITIONITERATORS_H_ bpp-seq-2.1.0/src/Bpp/Seq/DNAToRNA.cpp000644 000000 000000 00000005164 12147656566 017101 0ustar00rootroot000000 000000 // // File: DNAToRNA.cpp // Created by: Julien Dutheil // Created on: Sun Oct 12 14:39:29 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "DNAToRNA.h" using namespace bpp; using namespace std; /******************************************************************************/ int DNAToRNA::translate(int state) const throw (BadIntException) { dna_->intToChar(state); return state; } /******************************************************************************/ string DNAToRNA::translate(const std::string& state) const throw (BadCharException) { int i = dna_->charToInt(state); return rna_->intToChar(i); } /******************************************************************************/ int DNAToRNA::reverse(int state) const throw (BadIntException) { rna_->intToChar(state); return state; } /******************************************************************************/ string DNAToRNA::reverse(const std::string& state) const throw (BadCharException) { int i = rna_->charToInt(state); return dna_->intToChar(i); } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/SymbolListTools.cpp000644 000000 000000 00000015762 12147656566 020762 0ustar00rootroot000000 000000 // // File: SymbolListTools.cpp // Created by: Julien Dutheil // Created on: Wed Apr 9 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SymbolListTools.h" #include "Alphabet/AlphabetTools.h" #include //From the STL: #include using namespace std; using namespace bpp; void SymbolListTools::getCounts(const SymbolList& list, map& counts, bool resolveUnknowns) { if (!resolveUnknowns) { for (vector::const_iterator seqit = list.getContent().begin(); seqit != list.getContent().end(); seqit++) counts[*seqit]++; } else { for (vector::const_iterator seqit = list.getContent().begin(); seqit != list.getContent().end(); seqit++) { vector alias = list.getAlphabet()->getAlias(*seqit); double n = (double)alias.size(); for (size_t j = 0; j < alias.size(); j++) counts[alias[j]] += 1./n ; } } } void SymbolListTools::getCounts(const SymbolList& list1, const SymbolList& list2, map< int, map >& counts, bool resolveUnknowns) throw (DimensionException) { if (list1.size() != list2.size()) throw DimensionException("SymbolListTools::getCounts: the two sites must have the same size.", list1.size(), list2.size()); if (!resolveUnknowns) { for (size_t i = 0; i < list1.size(); i++) counts[list1[i]][list2[i]]++; } else { for (size_t i = 0; i < list1.size(); i++) { vector alias1 = list1.getAlphabet()->getAlias(list1[i]); vector alias2 = list2.getAlphabet()->getAlias(list2[i]); double n1 = (double)alias1.size(); double n2 = (double)alias2.size(); for (size_t j = 0; j < alias1.size(); j++) for (size_t k = 0; k < alias2.size(); k++) counts[alias1[j]][alias2[k]] += 1./(n1*n2) ; } } } void SymbolListTools::getFrequencies(const SymbolList& list, map& frequencies, bool resolveUnknowns) { double n = (double)list.size(); map counts; getCounts(list, counts, resolveUnknowns); for (map::iterator i = counts.begin(); i != counts.end(); i++) { frequencies[i->first] = i->second / n; } } void SymbolListTools::getFrequencies(const SymbolList& list1, const SymbolList& list2, map >& frequencies, bool resolveUnknowns) throw (DimensionException) { double n2 = (double)list1.size() * (double)list1.size(); map > counts; getCounts(list1, list2, counts, resolveUnknowns); for (map >::iterator i = counts.begin(); i != counts.end(); i++) for (map::iterator j = i->second.begin(); j != i->second.end(); j++) { frequencies[i->first][j->first] = j->second / n2; } } double SymbolListTools::getGCContent(const SymbolList& list, bool ignoreUnresolved, bool ignoreGap) throw (AlphabetException) { const Alphabet * alphabet = list.getAlphabet(); if (!AlphabetTools::isNucleicAlphabet(alphabet)) throw AlphabetException("SymbolListTools::getGCContent. Method only works on nucleotides.", alphabet); double gc = 0; double total = 0; for (size_t i = 0; i < list.size(); i++) { int state = list.getValue(i); if (state > -1) { // not a gap if (state == 1 || state == 2) { // G or C gc++; total++; } else if (state == 0 || state == 3) { // A, T or U total++; } else { // Unresolved character if (!ignoreUnresolved) { total++; switch(state) { case(7): gc++; break;// G or C case(4): gc+=0.5; break;// A or C case(5): gc+=0.5; break;// A or G case(6): gc+=0.5; break;// C or T case(9): gc+=0.5; break;// G or T case(10): gc+=2./3.; break;// A or C or G case(11): gc+=1./3.; break;// A or C or T case(12): gc+=1./3.; break;// A or G or T case(13): gc+=2./3.; break;// C or G or T case(14): gc+=0.5; break;// A or C or G or T } } } } else { if (!ignoreGap) total++; } } return total != 0 ? gc/total : 0; } size_t SymbolListTools::getNumberOfDistinctPositions(const SymbolList& l1, const SymbolList& l2) throw (AlphabetMismatchException) { if (l1.getAlphabet()->getAlphabetType() != l2.getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("SymbolListTools::getNumberOfDistinctPositions.", l1.getAlphabet(), l2.getAlphabet()); size_t n = min(l1.size(), l2.size()); size_t count = 0; for (size_t i = 0; i < n; i++) { if (l1[i] != l2[i]) count++; } return count; } size_t SymbolListTools::getNumberOfPositionsWithoutGap(const SymbolList& l1, const SymbolList& l2) throw (AlphabetMismatchException) { if (l1.getAlphabet() -> getAlphabetType() != l2.getAlphabet() -> getAlphabetType()) throw AlphabetMismatchException("SymbolListTools::getNumberOfDistinctPositions.", l1.getAlphabet(), l2.getAlphabet()); size_t n = min(l1.size(), l2.size()); size_t count = 0; for (size_t i = 0; i < n; i++) { if (l1[i] != -1 && l2[i] != -1) count++; } return count; } void SymbolListTools::changeGapsToUnknownCharacters(SymbolList& l) { int unknownCode = l.getAlphabet()->getUnknownCharacterCode(); for (size_t i = 0; i < l.size(); i++) { if (l.getAlphabet()->isGap(l[i])) l[i] = unknownCode; } } void SymbolListTools::changeUnresolvedCharactersToGaps(SymbolList& l) { int gapCode = l.getAlphabet()->getGapCharacterCode(); for (size_t i = 0; i < l.size(); i++) { if (l.getAlphabet()->isUnresolved(l[i])) l[i] = gapCode; } } bpp-seq-2.1.0/src/Bpp/Seq/Container/CompressedVectorSiteContainer.h000644 000000 000000 00000031074 12147656566 025200 0ustar00rootroot000000 000000 // // File: CompressedVectorSiteContainer.h // Created by: Julien Dutheil // Created on: Wed Dec 16 12:08 2009 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _COMPRESSEDVECTORSITECONTAINER_H_ #define _COMPRESSEDVECTORSITECONTAINER_H_ #include "../Site.h" #include "SiteContainer.h" #include "AbstractSequenceContainer.h" #include "AlignedSequenceContainer.h" #include "OrderedSequenceContainer.h" #include // From the STL library: #include #include #include namespace bpp { /** * @brief A low memory, yet restricted, version of the VectorSiteContainer class. * * This implementation is very similar to VectorSiteContainer, but identical sites * are stored only once, which significantly reduce memory usage in the case of * containers where the number of sites is large compared to the number of sequences. * site access is as fast as in the standard VectorSiteContainer class, but site * addition takes more time, as the new site must be first compared to the existing set. * A major restriction of this container is that you can't add or remove sequences. * The number of sequences is fixed after the first site has been added. * * @warning Since the data is compressed, the sites given as input are modified. The * major pratical consequence is that the 'position' attribute of sites will be lost. * Instead, the position will correspond to the position in the compressed container. * In addition, this container may lead to unexpected behavior if used with derived * classes of Site. Use with care then... * * @see Sequence, Site, VectorSiteContainer */ class CompressedVectorSiteContainer : public AbstractSequenceContainer, // This container implements the SequenceContainer interface // and use the AbstractSequenceContainer adapter. public virtual SiteContainer // This container is a SiteContainer. { protected: std::vector sites_; //A set of unique sites. std::vector index_; //For all sites, give the actual position in the set. std::vector names_; std::vector comments_; // Sequences comments. mutable std::vector sequences_; // To store pointer toward sequences retrieved (cf. AlignedSequenceContainer). public: /** * @brief Build a new container from a set of sites. * * @param vs A std::vector of sites. * @param alpha The common alphabet for all sites. * @throw Exception If sites differ in size or in alphabet. */ CompressedVectorSiteContainer(const std::vector& vs, const Alphabet* alpha) throw (Exception); /** * @brief Build a new empty container with specified size. * * @param size Number of sequences in the container. * @param alpha The alphabet for this container. */ CompressedVectorSiteContainer(size_t size, const Alphabet* alpha); /** * @brief Build a new empty container with specified sequence names. * * @param names Sequence names. This will set the number of sequences in the container. * @param alpha The alphabet for this container. */ CompressedVectorSiteContainer(const std::vector& names, const Alphabet* alpha); /** * @brief Build a new empty container. * * @param alpha The alphabet for this container. */ CompressedVectorSiteContainer(const Alphabet* alpha); CompressedVectorSiteContainer(const CompressedVectorSiteContainer& vsc); CompressedVectorSiteContainer(const SiteContainer& sc); CompressedVectorSiteContainer& operator=(const CompressedVectorSiteContainer& vsc); CompressedVectorSiteContainer& operator=(const SiteContainer& sc); virtual ~CompressedVectorSiteContainer() { clear(); } public: /** * @name The Clonable interface. * * @{ */ CompressedVectorSiteContainer* clone() const { return new CompressedVectorSiteContainer(*this); } /** @} */ /** * @name The SiteContainer interface implementation: * * @{ */ const Site& getSite(size_t siteIndex) const throw (IndexOutOfBoundsException); void setSite(size_t siteIndex, const Site& site, bool checkPosition = false) throw (Exception); Site* removeSite(size_t siteIndex) throw (IndexOutOfBoundsException); void deleteSite(size_t siteIndex) throw (IndexOutOfBoundsException); void deleteSites(size_t siteIndex, size_t length) throw (IndexOutOfBoundsException); void addSite(const Site& site, bool checkPosition = false) throw (Exception); void addSite(const Site& site, int position, bool checkPosition = false) throw (Exception) { addSite(site, checkPosition); } void addSite(const Site& site, size_t siteIndex, bool checkPosition = false) throw (Exception); void addSite(const Site& site, size_t siteIndex, int position, bool checkPosition = false) throw (Exception) { addSite(site, siteIndex, checkPosition); } size_t getNumberOfSites() const { return index_.size(); } void reindexSites(); Vint getSitePositions() const; /** @} */ // Theses methods are implemented for this class: /** * @name The SequenceContainer interface. * * @{ */ void setComments(size_t sequenceIndex, const Comments& comments) throw (IndexOutOfBoundsException); // Method to get a sequence object from sequence container const Sequence& getSequence(size_t sequenceIndex) const throw (IndexOutOfBoundsException); const Sequence& getSequence(const std::string& name) const throw (SequenceNotFoundException); bool hasSequence(const std::string& name) const; // Methods to get position of a sequence in sequence container from his name // This method is used by delete and remove methods size_t getSequencePosition(const std::string& name) const throw (SequenceNotFoundException); Sequence* removeSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException, NotImplementedException) { //Implementing this function would involve (partially) decompressing the data... throw NotImplementedException("CompressedVectorSiteContainer::removeSequence."); } Sequence* removeSequence(const std::string& name) throw (SequenceNotFoundException, NotImplementedException) { //Implementing this function would involve (partially) decompressing the data... throw NotImplementedException("CompressedVectorSiteContainer::removeSequence."); } void deleteSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException, NotImplementedException) { //Implementing this function would involve (partially) decompressing the data... throw NotImplementedException("CompressedVectorSiteContainer::deleteSequence."); } void deleteSequence(const std::string& name) throw (SequenceNotFoundException, NotImplementedException) { //Implementing this function would involve (partially) decompressing the data... throw NotImplementedException("CompressedVectorSiteContainer::deleteSequence."); } size_t getNumberOfSequences() const { return names_.size(); } std::vector getSequencesNames() const; void setSequencesNames(const std::vector& names, bool checkNames = true) throw (Exception); void clear(); CompressedVectorSiteContainer* createEmptyContainer() const; int& valueAt(const std::string& sequenceName, size_t elementIndex) throw (SequenceNotFoundException, IndexOutOfBoundsException) { if (elementIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::operator(std::string, size_t).", elementIndex, 0, getNumberOfSites() - 1); return (*sites_[index_[elementIndex]])[getSequencePosition(sequenceName)]; } const int& valueAt(const std::string& sequenceName, size_t elementIndex) const throw (SequenceNotFoundException, IndexOutOfBoundsException) { if (elementIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::operator(std::string, size_t).", elementIndex, 0, getNumberOfSites() - 1); return (*sites_[index_[elementIndex]])[getSequencePosition(sequenceName)]; } int& operator()(const std::string& sequenceName, size_t elementIndex) { return (*sites_[index_[elementIndex]])[getSequencePosition(sequenceName)]; } const int& operator()(const std::string& sequenceName, size_t elementIndex) const { return (*sites_[index_[elementIndex]])[getSequencePosition(sequenceName)]; } int& valueAt(size_t sequenceIndex, size_t elementIndex) throw (IndexOutOfBoundsException) { if (sequenceIndex >= getNumberOfSequences()) throw IndexOutOfBoundsException("VectorSiteContainer::operator(size_t, size_t).", sequenceIndex, 0, getNumberOfSequences() - 1); if (elementIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::operator(size_t, size_t).", elementIndex, 0, getNumberOfSites() - 1); return (*sites_[index_[elementIndex]])[sequenceIndex]; } const int& valueAt(size_t sequenceIndex, size_t elementIndex) const throw (IndexOutOfBoundsException) { if (sequenceIndex >= getNumberOfSequences()) throw IndexOutOfBoundsException("VectorSiteContainer::operator(size_t, size_t).", sequenceIndex, 0, getNumberOfSequences() - 1); if (elementIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::operator(size_t, size_t).", elementIndex, 0, getNumberOfSites() - 1); return (*sites_[index_[elementIndex]])[sequenceIndex]; } int& operator()(size_t sequenceIndex, size_t elementIndex) { return (*sites_[index_[elementIndex]])[sequenceIndex]; } const int& operator()(size_t sequenceIndex, size_t elementIndex) const { return (*sites_[index_[elementIndex]])[sequenceIndex]; } /** @} */ void addSequence(const Sequence& sequence, bool checkName = true) throw (Exception, NotImplementedException) { //Implementing this function would involve (partially) decompressing the data... throw NotImplementedException("CompressedVectorSiteContainer::addSequence."); } void addSequence(const Sequence& sequence, size_t sequenceIndex, bool checkName = true) throw (Exception, NotImplementedException) { //Implementing this function would involve (partially) decompressing the data... throw NotImplementedException("CompressedVectorSiteContainer::addSequence."); } void setSequence(const std::string& name, const Sequence& sequence, bool checkName) throw (Exception, NotImplementedException) { //Implementing this function would involve (partially) decompressing the data... throw NotImplementedException("CompressedVectorSiteContainer::setSequence."); } void setSequence(size_t sequenceIndex, const Sequence& sequence, bool checkName) throw (Exception, NotImplementedException) { //Implementing this function would involve (partially) decompressing the data... throw NotImplementedException("CompressedVectorSiteContainer::setSequence."); } protected: /** * @return The position of the site in the compressed set. If the site is not found, * this will return the number of sites in the compressed set. */ size_t getSiteIndex_(const Site& site); }; } // end of namespace bpp. #endif // _COMPRESSEDVECTORSITECONTAINER_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/SiteContainer.h000644 000000 000000 00000015517 12147656566 021774 0ustar00rootroot000000 000000 // // File SiteContainer.h // Created by: Guillaume Deuchst // Julien Dutheil // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SITECONTAINER_H_ #define _SITECONTAINER_H_ #include "../Site.h" #include "OrderedSequenceContainer.h" #include "SequenceContainerExceptions.h" #include #include // From the STL: #include namespace bpp { /** * @brief The SiteContainer interface. * * Container implementing the SiteContainer interface deal with aligned sequences. * This interface provides methods to retrieve, add or set sites in the alignment. * As for SequenceContainers, the maintenance of Sites is up to the container. * All site objects are cloned befored being added and retrieved. * All sites stored are deleted in the destructor of the container or after having called the deleteSite() method. */ class SiteContainer : public virtual OrderedSequenceContainer { public: SiteContainer() {} virtual ~SiteContainer() {} SiteContainer* clone() const = 0; public: /** * @brief Get a site from the container. * * @param siteIndex The position of the site in the container. * @return A site objet corresponding to site i in the alignment. * @throw IndexOutOfBoundsException If the specified site does not exists. */ virtual const Site& getSite(size_t siteIndex) const throw (IndexOutOfBoundsException) = 0; /** * @brief Set a site in the container. * * @param siteIndex The position of the site in the container. * @param site The site to set. * @param checkPosition Look if the position of the new site match a position attribute in the container. * @throw Exception If the specified site does not exists or is not correct. */ virtual void setSite(size_t siteIndex, const Site& site, bool checkPosition) throw (Exception) = 0; /** * @brief Add a site in the container. * * @param site The site to add. * @param checkPosition Look if the position of the new site match a position attribute in the container. * @throw Exception If the specified site does not exists or is not correct. */ virtual void addSite(const Site& site, bool checkPosition) throw (Exception) = 0; /** * @brief Add a site in the container. * * @param site The site to add. * @param position The new position of the site, to superseed the one in 'site'. * @param checkPosition Look if the position of the new site match a position attribute in the container. * @throw Exception If the specified site does not exists or is not correct. */ virtual void addSite(const Site& site, int position, bool checkPosition) throw (Exception) = 0; /** * @brief Add a site in the container. * * @param site The site to add. * @param siteIndex The position where to insert the site. * @param checkPosition Look if the position of the new site match a position attribute in the container. * @throw Exception If the specified site does not exists or is not correct. */ virtual void addSite(const Site& site, size_t siteIndex, bool checkPosition) throw (Exception) = 0; /** * @brief Add a site in the container. * * @param site The site to add. * @param siteIndex The position where to insert the site. * @param position The new position of the site, to superseed the one in 'site'. * @param checkPosition Look if the position of the new site match a position attribute in the container. * @throw Exception If the specified site does not exists or is not correct. */ virtual void addSite(const Site& site, size_t siteIndex, int position, bool checkPosition) throw (Exception) = 0; /** * @brief Remove a site from the container. * * The site is not deleted, a pointer toward it is returned. * * @param siteIndex The position of the site in the container. * @return A pointer toward site i in the alignment. * @throw IndexOutOfBoundsException If the specified site does not exists. */ virtual Site* removeSite(size_t siteIndex) throw (IndexOutOfBoundsException, Exception) = 0; /** * @brief Delete a site in the container. * * @param siteIndex The position of the site in the container. * @throw IndexOutOfBoundsException If the specified site does not exists. */ virtual void deleteSite(size_t siteIndex) throw (IndexOutOfBoundsException, Exception) = 0; /** * @brief Delete a continuous range of sites in the container. * * @param siteIndex The position of the first site in the container. * @param length The length of the region to delete, starting at pposition siteIndex. * @throw IndexOutOfBoundsException If the specified range is not valid. */ virtual void deleteSites(size_t siteIndex, size_t length) throw (IndexOutOfBoundsException, Exception) = 0; /** * @brief Get the number of sites in the container. * * @return The number of sites in the container. */ virtual size_t getNumberOfSites() const = 0; /** * @brief Set all positions attributes. */ virtual void reindexSites() = 0; /** * @brief Get all position attributes of sites. * * @return A vector with all site positions. */ virtual Vint getSitePositions() const = 0; }; } // end of namespace bpp. #endif // _SITECONTAINER_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/MapSequenceContainer.cpp000644 000000 000000 00000042024 12147656566 023622 0ustar00rootroot000000 000000 // // File MapSequenceContainer.cpp // Authors : Guillaume Deuchst // Julien Dutheil // Sylvain Gaillard // Last modification : Monday July 19 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "MapSequenceContainer.h" #include using namespace bpp; using namespace std; /******************************************************************************/ MapSequenceContainer::MapSequenceContainer(const map& ms, const Alphabet* alpha) : AbstractSequenceContainer(alpha), sequences_() { for (map::const_iterator it = ms.begin(); it != ms.end(); it++) { addSequence(it->first, *it->second); } } /******************************************************************************/ MapSequenceContainer::MapSequenceContainer(const MapSequenceContainer& msc) : AbstractSequenceContainer(msc.getAlphabet()), sequences_() { for (unsigned int i = 0; i < msc.getNumberOfSequences(); i++) addSequence(msc.getKey(i), msc.getSequence(i), false); } /******************************************************************************/ MapSequenceContainer& MapSequenceContainer::operator=(const MapSequenceContainer& msc) { clear(); AbstractSequenceContainer::operator=(msc); // Sequences insertion vector keys = msc.getKeys(); for (unsigned int i = 0 ; i < getNumberOfSequences(); i++) { addSequence(keys[i], msc.getSequence(i), false); } return * this; } /******************************************************************************/ MapSequenceContainer::~MapSequenceContainer() { clear(); } /******************************************************************************/ const Sequence& MapSequenceContainer::getSequence(size_t i) const throw (IndexOutOfBoundsException) { // Specified sequence existence verification if (i < sequences_.size()) { map::const_iterator it = sequences_.begin(); for (unsigned int j = 0; j < i; j++) it++; return *it->second; } throw IndexOutOfBoundsException("MapSequenceContainer::getSequence", i, 0, sequences_.size() - 1); } /******************************************************************************/ const Sequence& MapSequenceContainer::getSequence(const string& name) const throw (SequenceNotFoundException) { // Specified sequence name research into all sequences for (map::const_iterator it = sequences_.begin(); it != sequences_.end(); it++) if (it->second->getName() == name) return *it->second; throw SequenceNotFoundException("MapSequenceContainer::getSequence", name); } /******************************************************************************/ bool MapSequenceContainer::hasSequence(const string& name) const { // Specified sequence name research into all sequences for (map::const_iterator it = sequences_.begin(); it != sequences_.end(); it++) if (it->second->getName() == name) return true; return false; } /******************************************************************************/ Sequence& MapSequenceContainer::getSequence_(size_t i) throw (IndexOutOfBoundsException) { if (i >= sequences_.size()) throw IndexOutOfBoundsException("MapSequenceContainer::getSequence", i, 0, sequences_.size() - 1); map::iterator it = sequences_.begin(); for (size_t j = 0; j < i; j++) it++; return *it->second; } /******************************************************************************/ Sequence& MapSequenceContainer::getSequence_(const string& name) throw (SequenceNotFoundException) { // Specified sequence name research into all sequences for (map::iterator it = sequences_.begin(); it != sequences_.end(); it++) if (it->second->getName() == name) return *it->second; throw SequenceNotFoundException("MapSequenceContainer::getSequence", name); } /******************************************************************************/ const Sequence& MapSequenceContainer::getSequenceByKey(const string& key) const throw (SequenceNotFoundException) { map::const_iterator it = sequences_.find(key); if (it == sequences_.end()) throw SequenceNotFoundException("MapSequenceContainer::getSequenceByKey", key); return *it->second; } /******************************************************************************/ size_t MapSequenceContainer::getSequencePosition(const string& name) const throw (SequenceNotFoundException) { // Specified sequence name research into all sequences int pos = 0; for (map::const_iterator it = sequences_.begin(); it != sequences_.end(); it++) { if (it->second->getName() == name) return pos; pos++; } throw SequenceNotFoundException("MapSequenceContainer::getSequencePosition", name); } /******************************************************************************/ void MapSequenceContainer::setSequence(size_t i, const Sequence& sequence, bool checkNames) throw (IndexOutOfBoundsException) { // Sequence's name existence checking if (checkNames) { size_t j = 0; // For all names in map : throw exception if name already exists for (map::const_iterator it = sequences_.begin(); it != sequences_.end(); it++) { if (it->second->getName() == sequence.getName()) if (j != i) throw Exception("MapSequenceContainer::setSequence : Sequence's name already exists in container"); j++; } } // New sequence's alphabet and sequence container's alphabet matching verification if (sequence.getAlphabet()->getAlphabetType() == getAlphabet()->getAlphabetType()) { // Delete old sequence delete sequences_[getKey(i)]; // New sequence insertion in sequence container sequences_[getKey(i)] = dynamic_cast(sequence.clone()); } else throw AlphabetMismatchException("MapSequenceContainer::setSequence", getAlphabet(), sequence.getAlphabet()); } /******************************************************************************/ void MapSequenceContainer::setSequence(const string& name, const Sequence& sequence, bool checkNames) throw (SequenceNotFoundException) { // Sequence's name existence checking if (checkNames) { // For all names in map : throw exception if name already exists for (map::const_iterator it = sequences_.begin(); it != sequences_.end(); it++) { if (it->second->getName() == name) if (it->second->getName() != name) throw Exception("MapSequenceContainer::setSequence : Sequence's name already exists in container"); } } // New sequence's alphabet and sequence container's alphabet matching verification if (sequence.getAlphabet()->getAlphabetType() == getAlphabet()->getAlphabetType()) { // Delete old sequence delete sequences_[name]; // New sequence insertion in sequence container sequences_[name] = dynamic_cast(sequence.clone()); } else throw AlphabetMismatchException("MapSequenceContainer::setSequence", getAlphabet(), sequence.getAlphabet()); } /******************************************************************************/ void MapSequenceContainer::setSequenceByKey(const string& key, const Sequence& sequence, bool checkNames) throw (SequenceNotFoundException) { // Sequence's name existence checking if (checkNames) { // For all names in map : throw exception if name already exists for (map::const_iterator it = sequences_.begin(); it != sequences_.end(); it++) { if (it->second->getName() == sequence.getName()) if (it->first != key) throw Exception("MapSequenceContainer::setSequenceByKey : Sequence's name already exists in container"); } } // New sequence's alphabet and sequence container's alphabet matching verification if (sequence.getAlphabet()->getAlphabetType() == getAlphabet()->getAlphabetType()) { // Delete old sequence delete sequences_[key]; // New sequence insertion in sequence container sequences_[key] = dynamic_cast(sequence.clone()); } else throw AlphabetMismatchException("MapSequenceContainer::setSequenceByKey", getAlphabet(), sequence.getAlphabet()); } /******************************************************************************/ Sequence* MapSequenceContainer::removeSequence(size_t i) throw (IndexOutOfBoundsException) { if (i >= sequences_.size()) throw IndexOutOfBoundsException("MapSequenceContainer::removeSequence", i, 0, sequences_.size() - 1); map::iterator it = sequences_.begin(); for (size_t j = 0; j < i; j++) it++; Sequence* old = it->second; sequences_.erase(it); return old; } /******************************************************************************/ Sequence* MapSequenceContainer::removeSequence(const string& name) throw (SequenceNotFoundException) { for (map::iterator it = sequences_.begin(); it != sequences_.end(); it++) { if (it->second->getName() == name) { Sequence* old = it->second; sequences_.erase(it); return old; } } throw SequenceNotFoundException("MapSequenceContainer::removeSequence", name); } /******************************************************************************/ Sequence* MapSequenceContainer::removeSequenceByKey(const string& key)throw (SequenceNotFoundException) { map::iterator it = sequences_.find(key); if (it == sequences_.end()) throw SequenceNotFoundException("MapSequenceContainer::removeSequenceByKey", key); Sequence* old = it->second; sequences_.erase(key); return old; } /******************************************************************************/ void MapSequenceContainer::deleteSequence(size_t i) throw (IndexOutOfBoundsException) { if (i >= sequences_.size()) throw IndexOutOfBoundsException("MapSequenceContainer::deleteSequence", i, 0, sequences_.size() - 1); map::iterator it = sequences_.begin(); for (size_t j = 0; j < i; j++) it++; delete it->second; sequences_.erase(it); } /******************************************************************************/ void MapSequenceContainer::deleteSequence(const string& name) throw (SequenceNotFoundException) { for (map::iterator it = sequences_.begin(); it != sequences_.end(); it++) { if (it->second->getName() == name) { delete it->second; sequences_.erase(it); return; } } throw SequenceNotFoundException("MapSequenceContainer::deleteSequence", name); } /******************************************************************************/ void MapSequenceContainer::deleteSequenceByKey(const string& key) throw (SequenceNotFoundException) { map::iterator it = sequences_.find(key); if (it == sequences_.end()) throw SequenceNotFoundException("MapSequenceContainer::deleteSequenceByKey", key); delete it->second; sequences_.erase(key); } /******************************************************************************/ void MapSequenceContainer::addSequence(const string& key, const Sequence& sequence, bool checkNames) throw (Exception) { // Sequence's name existence checking if (checkNames) { // For all names in map : throw exception if name already exists for (map::const_iterator it = sequences_.begin(); it != sequences_.end(); it++) { if (it->second->getName() == sequence.getName()) throw Exception("MapSequenceContainer::addSequence: Sequence '" + sequence.getName() + ", already exists in container"); } } // Check if the key is not used for (map::const_iterator it = sequences_.begin(); it != sequences_.end(); it++) if (key == it->first) throw Exception("MapSequenceContainer::addSequence: key already in use. (" + key + ")"); // New sequence's alphabet and sequence container's alphabet matching verification if (sequence.getAlphabet()->getAlphabetType() == getAlphabet()->getAlphabetType()) sequences_.insert(make_pair(key, dynamic_cast(sequence.clone()))); else throw AlphabetMismatchException("MapSequenceContainer::addSequence", getAlphabet(), sequence.getAlphabet()); } /******************************************************************************/ vector MapSequenceContainer::getKeys() const { vector keys; for (map::const_iterator it = sequences_.begin(); it != sequences_.end(); it++) keys.push_back(it->first); return keys; } /******************************************************************************/ string MapSequenceContainer::getKey(size_t pos) const throw (IndexOutOfBoundsException) { if (pos >= getNumberOfSequences()) throw IndexOutOfBoundsException("MapSequenceContainer::getKey", pos, 0, sequences_.size() - 1); map::const_iterator it = sequences_.begin(); for (size_t i = 0; i < pos; i++) it++; return it->first; } /******************************************************************************/ string MapSequenceContainer::getKey(const string& name) const throw (SequenceNotFoundException) { try { return getKey(getSequencePosition(name)); } catch (SequenceNotFoundException & snfe) { throw SequenceNotFoundException("MapSequenceContainer::getKey", snfe.getSequenceId()); } } /******************************************************************************/ void MapSequenceContainer::setComments(size_t pos, const Comments& comments) throw (IndexOutOfBoundsException) { if (pos >= getNumberOfSequences()) throw IndexOutOfBoundsException("MapSequenceContainer::setComments", pos, 0, sequences_.size() - 1); map::iterator it = sequences_.begin(); for (size_t i = 0 ; i < pos ; i++) it++; it->second->setComments(comments); } /******************************************************************************/ vector MapSequenceContainer::getSequencesNames() const { vector names; for (map::const_iterator it = sequences_.begin(); it != sequences_.end(); it++) names.push_back(it->second->getName()); return names; } /******************************************************************************/ void MapSequenceContainer::setSequencesNames(const vector& names, bool checkNames) throw (Exception) { if (names.size() != getNumberOfSequences()) throw IndexOutOfBoundsException("MapSequenceContainer::setSequenceNames : bad number of names", names.size(), getNumberOfSequences(), getNumberOfSequences()); if (checkNames) { // check if there is no repeat names in teh vector for (size_t i = 0 ; i < names.size() ; i++) for (unsigned int j = 0 ; j < i ; j++) if (names[j] == names[i]) throw Exception("MapSequenceContainer::setSequencesNames: Sequence's name already exists in container"); } map::iterator it = sequences_.begin(); for (size_t i = 0 ; i < names.size() ; i++) { it->second->setName(names[i]); it++; } } /******************************************************************************/ void MapSequenceContainer::clear() { // Delete sequences for (map::iterator it = sequences_.begin(); it != sequences_.end(); it++) delete it->second; // Delete all sequence pointers sequences_.clear(); } /******************************************************************************/ MapSequenceContainer* MapSequenceContainer::createEmptyContainer() const { MapSequenceContainer* msc = new MapSequenceContainer(getAlphabet()); msc->setGeneralComments(getGeneralComments()); return msc; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Container/SequenceContainerTools.cpp000644 000000 000000 00000016507 12147656566 024214 0ustar00rootroot000000 000000 // // File: SequenceContainerTools.cpp // Created by: Julien Dutheil // Created on: Sat Oct 4 09:18:34 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SequenceContainerTools.h" #include "VectorSequenceContainer.h" #include "../Alphabet/CodonAlphabet.h" using namespace bpp; // From the STL: #include using namespace std; /******************************************************************************/ SequenceContainer* SequenceContainerTools::createContainerOfSpecifiedSize(const Alphabet* alphabet, size_t size) { VectorSequenceContainer* vsc = new VectorSequenceContainer(alphabet); for (size_t i = 0; i < size; ++i) { vsc->addSequence(BasicSequence("" + i, "", alphabet), false); } return vsc; } /******************************************************************************/ SequenceContainer* SequenceContainerTools::createContainerWithSequenceNames( const Alphabet* alphabet, const vector& seqNames) throw (Exception) { SequenceContainer* sc = createContainerOfSpecifiedSize(alphabet, seqNames.size()); sc->setSequencesNames(seqNames, true); return sc; } /******************************************************************************/ void SequenceContainerTools::getSelectedSequences( const OrderedSequenceContainer& sequences, const SequenceSelection& selection, SequenceContainer& outputCont) throw (Exception) { bool checkNames = outputCont.getNumberOfSequences() > 0; for (size_t i = 0; i < selection.size(); i++) { outputCont.addSequence(sequences.getSequence(selection[i]), checkNames); } } /******************************************************************************/ void SequenceContainerTools::getSelectedSequences( const SequenceContainer& sequences, const std::vector& selection, SequenceContainer& outputCont, bool strict) throw (Exception) { bool checkNames = outputCont.getNumberOfSequences() > 0; for (size_t i = 0; i < selection.size(); i++) { if (strict) { outputCont.addSequence(sequences.getSequence(selection[i]), checkNames); } else { if (sequences.hasSequence(selection[i])) outputCont.addSequence(sequences.getSequence(selection[i]), checkNames); } } } /******************************************************************************/ void SequenceContainerTools::keepOnlySelectedSequences( OrderedSequenceContainer& sequences, const SequenceSelection& selection) { vector names = sequences.getSequencesNames(); for (size_t i = 0; i < names.size(); i++) { // We need to do this because after removal the indices will not be the same! // another solution would be to sort decreasingly the indices... bool test = false; for (size_t j = 0; j < selection.size() && !test; j++) { test = (selection[j] == i); } if (!test) sequences.deleteSequence(names[i]); // WARNING: what if selection contains several times the same indice? ... } } /******************************************************************************/ bool SequenceContainerTools::sequencesHaveTheSameLength(const SequenceContainer& sequences) { vector seqNames = sequences.getSequencesNames(); if (seqNames.size() <= 1) return true; size_t length = sequences.getSequence(seqNames[0]).size(); for (size_t i = 1; i < seqNames.size(); i++) { if (sequences.getSequence(seqNames[i]).size() != length) return false; } return true; } /******************************************************************************/ void SequenceContainerTools::getFrequencies(const SequenceContainer& sequences, std::map& f, double pseudoCount) { double n = 0; vector names = sequences.getSequencesNames(); for (size_t j = 0; j < names.size(); j++) { vector seq = sequences.getContent(names[j]); for (size_t i = 0; i < seq.size(); i++) { f[seq[i]]++; } n += static_cast(seq.size()); } if (pseudoCount != 0) { const Alphabet* pA = sequences.getAlphabet(); for (int i = 0; i < static_cast(pA->getSize()); i++) { f[i] += pseudoCount; } n += pseudoCount * static_cast(pA->getSize()); } for (map::iterator i = f.begin(); i != f.end(); i++) { i->second = i->second / n; } } /******************************************************************************/ void SequenceContainerTools::getCounts(const SequenceContainer& sequences, std::map& f) { size_t n = 0; vector names = sequences.getSequencesNames(); for (size_t j = 0; j < names.size(); j++) { vector seq = sequences.getContent(names[j]); for (size_t i = 0; i < seq.size(); i++) { f[seq[i]]++; } n += seq.size(); } } /******************************************************************************/ SequenceContainer* SequenceContainerTools::getCodonPosition(const SequenceContainer& sequences, size_t pos) throw (AlphabetException) { const CodonAlphabet* calpha = dynamic_cast(sequences.getAlphabet()); if (!calpha) throw AlphabetException("SequenceContainerTools::getCodonPosition. Input sequences should be of type codon."); vector names = sequences.getSequencesNames(); VectorSequenceContainer* newcont = new VectorSequenceContainer(calpha->getNucleicAlphabet()); for (size_t j = 0; j < names.size(); j++) { vector seq = sequences.getContent(names[j]); vector newseq(seq.size()); for (size_t i = 0; i < seq.size(); i++) { newseq[i] = calpha->getNPosition(seq[i], pos); } BasicSequence s(names[j], newseq, sequences.getComments(names[j]), calpha->getNucleicAlphabet()); newcont->addSequence(s); } return newcont; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Container/SequenceContainerIterator.h000644 000000 000000 00000006076 12147656566 024352 0ustar00rootroot000000 000000 // // File: SequenceContainerIterator.h // Created by: Julien Dutheil // Created on: Tue Feb 26 14:34 2013 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCECONTAINERITERATOR_H_ #define _SEQUENCECONTAINERITERATOR_H_ #include "../Sequence.h" #include "../SequenceIterator.h" #include "OrderedSequenceContainer.h" namespace bpp { /** * @brief Partial implementation of the SequenceIterator interface, allowing to loop over an ordered sequence container. */ class AbstractSequenceContainerIterator : public virtual ConstSequenceIterator { protected: const OrderedSequenceContainer* sequences_; int currentPosition_; public: AbstractSequenceContainerIterator(const OrderedSequenceContainer& sites); AbstractSequenceContainerIterator(const AbstractSequenceContainerIterator& asi) : sequences_(asi.sequences_), currentPosition_(asi.currentPosition_) {} AbstractSequenceContainerIterator& operator=(const AbstractSequenceContainerIterator& asi) { sequences_ = asi.sequences_; currentPosition_ = asi.currentPosition_; return *this; } virtual ~AbstractSequenceContainerIterator() {} }; /** * @brief Loop over all sequences in a SequenceContainer. */ class SimpleSequenceContainerIterator: public AbstractSequenceContainerIterator { public: SimpleSequenceContainerIterator(const OrderedSequenceContainer& sites); virtual ~SimpleSequenceContainerIterator() {} public: const Sequence* nextSequence(); bool hasMoreSequences() const; }; } //end of namespace bpp. #endif //_SEQUENCEITERATOR_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/SiteContainerTools.cpp000644 000000 000000 00000100250 12147656566 023335 0ustar00rootroot000000 000000 // // File: SiteContainerTools.cpp // Created by: Julien Dutheil // Sylvain Glémin // Created on: Fri Dec 12 18:55:06 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SiteContainerTools.h" #include "SequenceContainerTools.h" #include "VectorSiteContainer.h" #include "SiteContainerIterator.h" #include "../SiteTools.h" #include "../Alphabet/AlphabetTools.h" #include "../SequenceTools.h" #include using namespace bpp; // From the STL: #include #include #include using namespace std; /******************************************************************************/ SiteContainer* SiteContainerTools::getSitesWithoutGaps(const SiteContainer& sites) { vector seqNames = sites.getSequencesNames(); VectorSiteContainer* noGapCont = new VectorSiteContainer(seqNames.size(), sites.getAlphabet()); noGapCont->setSequencesNames(seqNames, false); NoGapSiteContainerIterator ngsi(sites); while (ngsi.hasMoreSites()) { noGapCont->addSite(*ngsi.nextSite()); } return noGapCont; } /******************************************************************************/ SiteContainer* SiteContainerTools::getCompleteSites(const SiteContainer& sites) { vector seqNames = sites.getSequencesNames(); VectorSiteContainer* noGapCont = new VectorSiteContainer(seqNames.size(), sites.getAlphabet()); noGapCont->setSequencesNames(seqNames, false); CompleteSiteContainerIterator csi(sites); while (csi.hasMoreSites()) { noGapCont->addSite(*csi.nextSite()); } return noGapCont; } /******************************************************************************/ SiteContainer* SiteContainerTools::getSelectedSites( const SiteContainer& sequences, const SiteSelection& selection) { vector seqNames = sequences.getSequencesNames(); VectorSiteContainer* sc = new VectorSiteContainer(seqNames.size(), sequences.getAlphabet()); sc->setSequencesNames(seqNames, false); for (unsigned int i = 0; i < selection.size(); i++) { sc->addSite(sequences.getSite(selection[i]), false); // We do not check names, we suppose that the container passed as an argument is correct. // WARNING: what if selection contains many times the same indice? ... } sc->setGeneralComments(sequences.getGeneralComments()); return sc; } /******************************************************************************/ const Sequence* SiteContainerTools::getConsensus(const SiteContainer& sc, const std::string& name, bool ignoreGap, bool resolveUnknown) { Vint consensus; SimpleSiteContainerIterator ssi(sc); const Site* site; while (ssi.hasMoreSites()) { site = ssi.nextSite(); map freq; SiteTools::getFrequencies(*site, freq, resolveUnknown); double max = 0; int cons = -1; // default result if (ignoreGap) { for (map::iterator it = freq.begin(); it != freq.end(); it++) { if (it->second > max && it->first != -1) { max = it->second; cons = it->first; } } } else { for (map::iterator it = freq.begin(); it != freq.end(); it++) { if (it->second > max) { max = it->second; cons = it->first; } } } consensus.push_back(cons); } const Sequence* seqConsensus = new BasicSequence(name, consensus, sc.getAlphabet()); return seqConsensus; } /******************************************************************************/ void SiteContainerTools::changeGapsToUnknownCharacters(SiteContainer& sites) { // NB: use iterators for a better algorithm? int unknownCode = sites.getAlphabet()->getUnknownCharacterCode(); for (unsigned int i = 0; i < sites.getNumberOfSites(); i++) { for (unsigned int j = 0; j < sites.getNumberOfSequences(); j++) { int* element = &sites(j, i); if (sites.getAlphabet()->isGap(*element)) *element = unknownCode; } } } /******************************************************************************/ void SiteContainerTools::changeUnresolvedCharactersToGaps(SiteContainer& sites) { // NB: use iterators for a better algorithm? int gapCode = sites.getAlphabet()->getGapCharacterCode(); for (unsigned int i = 0; i < sites.getNumberOfSites(); i++) { for (unsigned int j = 0; j < sites.getNumberOfSequences(); j++) { int* element = &sites(j, i); if (sites.getAlphabet()->isUnresolved(*element)) *element = gapCode; } } } /******************************************************************************/ SiteContainer* SiteContainerTools::removeGapOnlySites(const SiteContainer& sites) { vector seqNames = sites.getSequencesNames(); VectorSiteContainer* noGapCont = new VectorSiteContainer(seqNames.size(), sites.getAlphabet()); noGapCont->setSequencesNames(seqNames, false); for (unsigned int i = 0; i < sites.getNumberOfSites(); i++) { const Site* site = &sites.getSite(i); if (!SiteTools::isGapOnly(*site)) noGapCont->addSite(*site); } return noGapCont; } /******************************************************************************/ void SiteContainerTools::removeGapOnlySites(SiteContainer& sites) { size_t n = sites.getNumberOfSites(); size_t i = n; while (i > 1) { ApplicationTools::displayGauge(n - i + 1, n); const Site* site = &sites.getSite(i - 1); if (SiteTools::isGapOnly(*site)) { size_t end = i; while (SiteTools::isGapOnly(*site) && i > 1) { --i; site = &sites.getSite(i - 1); } sites.deleteSites(i, end - i); } else { --i; } } ApplicationTools::displayGauge(n, n); const Site* site = &sites.getSite(0); if (SiteTools::isGapOnly(*site)) sites.deleteSite(0); } /******************************************************************************/ SiteContainer* SiteContainerTools::removeGapOrUnresolvedOnlySites(const SiteContainer& sites) { vector seqNames = sites.getSequencesNames(); VectorSiteContainer* noGapCont = new VectorSiteContainer(seqNames.size(), sites.getAlphabet()); noGapCont->setSequencesNames(seqNames, false); for (unsigned int i = 0; i < sites.getNumberOfSites(); i++) { const Site* site = &sites.getSite(i); if (!SiteTools::isGapOrUnresolvedOnly(*site)) noGapCont->addSite(*site, false); } return noGapCont; } /******************************************************************************/ void SiteContainerTools::removeGapOrUnresolvedOnlySites(SiteContainer& sites) { size_t n = sites.getNumberOfSites(); size_t i = n; while (i > 1) { ApplicationTools::displayGauge(n - i + 1, n); const Site* site = &sites.getSite(i - 1); if (SiteTools::isGapOnly(*site)) { size_t end = i; while (SiteTools::isGapOrUnresolvedOnly(*site) && i > 1) { --i; site = &sites.getSite(i - 1); } sites.deleteSites(i, end - i); } else { --i; } } ApplicationTools::displayGauge(n, n); const Site* site = &sites.getSite(0); if (SiteTools::isGapOrUnresolvedOnly(*site)) sites.deleteSite(0); } /******************************************************************************/ SiteContainer* SiteContainerTools::removeGapSites(const SiteContainer& sites, double maxFreqGaps) { vector seqNames = sites.getSequencesNames(); VectorSiteContainer* noGapCont = new VectorSiteContainer(seqNames.size(), sites.getAlphabet()); noGapCont->setSequencesNames(seqNames, false); for (unsigned int i = 0; i < sites.getNumberOfSites(); ++i) { map freq; SiteTools::getFrequencies(sites.getSite(i), freq); if (freq[-1] <= maxFreqGaps) noGapCont->addSite(sites.getSite(i), false); } return noGapCont; } /******************************************************************************/ void SiteContainerTools::removeGapSites(SiteContainer& sites, double maxFreqGaps) { for (size_t i = sites.getNumberOfSites(); i > 0; i--) { map freq; SiteTools::getFrequencies(sites.getSite(i - 1), freq); if (freq[-1] > maxFreqGaps) sites.deleteSite(i - 1); } } /******************************************************************************/ SiteContainer* SiteContainerTools::removeStopCodonSites(const SiteContainer& sites) throw (AlphabetException) { const CodonAlphabet* pca = dynamic_cast(sites.getAlphabet()); if (!pca) throw AlphabetException("Not a Codon Alphabet", sites.getAlphabet()); vector seqNames = sites.getSequencesNames(); VectorSiteContainer* noStopCont = new VectorSiteContainer(seqNames.size(), sites.getAlphabet()); noStopCont->setSequencesNames(seqNames, false); for (unsigned int i = 0; i < sites.getNumberOfSites(); i++) { const Site* site = &sites.getSite(i); if (!SiteTools::hasStopCodon(*site)) noStopCont->addSite(*site, false); } return noStopCont; } /******************************************************************************/ SiteContainer* SiteContainerTools::resolveDottedAlignment( const SiteContainer& dottedAln, const Alphabet* resolvedAlphabet) throw (AlphabetException, Exception) { if (!AlphabetTools::isDefaultAlphabet(dottedAln.getAlphabet())) throw AlphabetException("SiteContainerTools::resolveDottedAlignment. Alignment alphabet should of class 'DefaultAlphabet'.", dottedAln.getAlphabet()); // First we look for the reference sequence: size_t n = dottedAln.getNumberOfSequences(); if (n == 0) throw Exception("SiteContainerTools::resolveDottedAlignment. Input alignment contains no sequence."); const Sequence* refSeq = 0; for (size_t i = 0; i < n; ++i) // Test each sequence { const Sequence* seq = &dottedAln.getSequence(i); bool isRef = true; for (unsigned int j = 0; isRef && j < seq->size(); ++j) // For each site in the sequence { if (seq->getChar(j) == ".") isRef = false; } if (isRef) // We found the reference sequence! { refSeq = new BasicSequence(*seq); } } if (!refSeq) throw Exception("SiteContainerTools::resolveDottedAlignment. No reference sequence was found in the input alignment."); // Now we build a new VectorSiteContainer: VectorSiteContainer* sites = new VectorSiteContainer(n, resolvedAlphabet); // We add each site one by one: size_t m = dottedAln.getNumberOfSites(); string state; for (unsigned int i = 0; i < m; ++i) { string resolved = refSeq->getChar(i); const Site* site = &dottedAln.getSite(i); Site resolvedSite(resolvedAlphabet, site->getPosition()); for (unsigned int j = 0; j < n; j++) { state = site->getChar(j); if (state == ".") { state = resolved; } resolvedSite.addElement(state); } // Add the new site: sites->addSite(resolvedSite); } // Seq sequence names: sites->setSequencesNames(dottedAln.getSequencesNames()); // Delete the copied sequence: delete refSeq; // Return result: return sites; } /******************************************************************************/ std::map SiteContainerTools::getSequencePositions(const Sequence& seq) { map tln; if (seq.size() == 0) return tln; unsigned int count = 0; for (size_t i = 0; i < seq.size(); i++) { if (seq[i] != -1) { count++; tln[i + 1] = count; } } return tln; } /******************************************************************************/ std::map SiteContainerTools::getAlignmentPositions(const Sequence& seq) { map tln; if (seq.size() == 0) return tln; unsigned int count = 0; for (size_t i = 0; i < seq.size(); i++) { if (seq[i] != -1) { count++; tln[count] = i + 1; } } return tln; } /******************************************************************************/ std::map SiteContainerTools::translateAlignment(const Sequence& seq1, const Sequence& seq2) throw (AlphabetMismatchException, Exception) { if (seq1.getAlphabet()->getAlphabetType() != seq2.getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("SiteContainerTools::translateAlignment", seq1.getAlphabet(), seq2.getAlphabet()); map tln; if (seq1.size() == 0) return tln; unsigned int count1 = 0; unsigned int count2 = 0; if (seq2.size() == 0) throw Exception("SiteContainerTools::translateAlignment. Sequences do not match at position " + TextTools::toString(count1 + 1) + " and " + TextTools::toString(count2 + 1) + "."); int state1 = seq1[count1]; int state2 = seq2[count2]; bool end = false; while (!end) { while (state1 == -1) { count1++; if (count1 < seq1.size()) state1 = seq1[count1]; else break; } while (state2 == -1) { count2++; if (count2 < seq2.size()) state2 = seq2[count2]; else break; } if (state1 != state2) throw Exception("SiteContainerTools::translateAlignment. Sequences do not match at position " + TextTools::toString(count1 + 1) + " and " + TextTools::toString(count2 + 1) + "."); tln[count1 + 1] = count2 + 1; // Count start at 1 if (count1 == seq1.size() - 1) end = true; else { if (count2 == seq2.size() - 1) { state1 = seq1[++count1]; while (state1 == -1) { count1++; if (count1 < seq1.size()) state1 = seq1[count1]; else break; } if (state1 == -1) end = true; else throw Exception("SiteContainerTools::translateAlignment. Sequences do not match at position " + TextTools::toString(count1 + 1) + " and " + TextTools::toString(count2 + 1) + "."); } else { state1 = seq1[++count1]; state2 = seq2[++count2]; } } } return tln; } /******************************************************************************/ std::map SiteContainerTools::translateSequence(const SiteContainer& sequences, size_t i1, size_t i2) { const Sequence* seq1 = &sequences.getSequence(i1); const Sequence* seq2 = &sequences.getSequence(i2); map tln; size_t count1 = 0; // Sequence 1 counter size_t count2 = 0; // Sequence 2 counter int state1; int state2; for (size_t i = 0; i < sequences.getNumberOfSites(); i++) { state1 = (*seq1)[i]; if (state1 != -1) count1++; state2 = (*seq2)[i]; if (state2 != -1) count2++; if (state1 != -1) { tln[count1] = (state2 == -1 ? 0 : count2); } } return tln; } /******************************************************************************/ AlignedSequenceContainer* SiteContainerTools::alignNW( const Sequence& seq1, const Sequence& seq2, const AlphabetIndex2& s, double gap) throw (AlphabetMismatchException) { if (seq1.getAlphabet()->getAlphabetType() != seq2.getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("SiteContainerTools::alignNW", seq1.getAlphabet(), seq2.getAlphabet()); if (seq1.getAlphabet()->getAlphabetType() != s.getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("SiteContainerTools::alignNW", seq1.getAlphabet(), s.getAlphabet()); // Check that sequences have no gap! auto_ptr s1(seq1.clone()); SequenceTools::removeGaps(*s1); auto_ptr s2(seq2.clone()); SequenceTools::removeGaps(*s2); // 1) Initialize matrix: RowMatrix m(s1->size() + 1, s2->size() + 1); RowMatrix p(s1->size(), s2->size()); double choice1, choice2, choice3, mx; char px; for (size_t i = 0; i <= s1->size(); i++) { m(i, 0) = static_cast(i) * gap; } for (size_t j = 0; j <= s2->size(); j++) { m(0, j) = static_cast(j) * gap; } for (size_t i = 1; i <= s1->size(); i++) { for (size_t j = 1; j <= s2->size(); j++) { choice1 = m(i - 1, j - 1) + static_cast(s.getIndex((*s1)[i - 1], (*s2)[j - 1])); choice2 = m(i - 1, j) + gap; choice3 = m(i, j - 1) + gap; mx = choice1; px = 'd'; // Default in case of equality of scores. if (choice2 > mx) { mx = choice2; px = 'u'; } if (choice3 > mx) { mx = choice3; px = 'l'; } m(i, j) = mx; p(i - 1, j - 1) = px; } } // 2) Get alignment: deque a1, a2; size_t i = s1->size(), j = s2->size(); char c; while (i > 0 && j > 0) { c = p(i - 1, j - 1); if (c == 'd') { a1.push_front((*s1)[i - 1]); a2.push_front((*s2)[j - 1]); i--; j--; } else if (c == 'u') { a1.push_front((*s1)[i - 1]); a2.push_front(-1); i--; } else { a1.push_front(-1); a2.push_front((*s2)[j - 1]); j--; } } while (i > 0) { a1.push_front((*s1)[i - 1]); a2.push_front(-1); i--; } while (j > 0) { a1.push_front(-1); a2.push_front((*s2)[j - 1]); j--; } s1->setContent(vector(a1.begin(), a1.end())); s2->setContent(vector(a2.begin(), a2.end())); AlignedSequenceContainer* asc = new AlignedSequenceContainer(s1->getAlphabet()); asc->addSequence(*s1, false); asc->addSequence(*s2, false); // Do not check for sequence names. return asc; } /******************************************************************************/ AlignedSequenceContainer* SiteContainerTools::alignNW( const Sequence& seq1, const Sequence& seq2, const AlphabetIndex2& s, double opening, double extending) throw (AlphabetMismatchException) { if (seq1.getAlphabet()->getAlphabetType() != seq2.getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("SiteContainerTools::alignNW", seq1.getAlphabet(), seq2.getAlphabet()); if (seq1.getAlphabet()->getAlphabetType() != s.getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("SiteContainerTools::alignNW", seq1.getAlphabet(), s.getAlphabet()); // Check that sequences have no gap! auto_ptr s1(seq1.clone()); SequenceTools::removeGaps(*s1); auto_ptr s2(seq2.clone()); SequenceTools::removeGaps(*s2); // 1) Initialize matrix: RowMatrix m(s1->size() + 1, s2->size() + 1); RowMatrix v(s1->size() + 1, s2->size() + 1); RowMatrix h(s1->size() + 1, s2->size() + 1); RowMatrix p(s1->size(), s2->size()); double choice1, choice2, choice3, mx; char px; m(0, 0) = 0.; for (size_t i = 0; i <= s1->size(); i++) { v(i, 0) = log(0.); } for (size_t j = 0; j <= s2->size(); j++) { h(0, j) = log(0.); } for (size_t i = 1; i <= s1->size(); i++) { m(i, 0) = h(i, 0) = opening + static_cast(i) * extending; } for (size_t j = 1; j <= s2->size(); j++) { m(0, j) = v(0, j) = opening + static_cast(j) * extending; } for (size_t i = 1; i <= s1->size(); i++) { for (size_t j = 1; j <= s2->size(); j++) { choice1 = m(i - 1, j - 1) + s.getIndex((*s1)[i - 1], (*s2)[j - 1]); choice2 = h(i - 1, j - 1) + opening + extending; choice3 = v(i - 1, j - 1) + opening + extending; mx = choice1; // Default in case of equality of scores. if (choice2 > mx) { mx = choice2; } if (choice3 > mx) { mx = choice3; } m(i, j) = mx; choice1 = m(i, j - 1) + opening + extending; choice2 = h(i, j - 1) + extending; mx = choice1; // Default in case of equality of scores. if (choice2 > mx) { mx = choice2; } h(i, j) = mx; choice1 = m(i - 1, j) + opening + extending; choice2 = v(i - 1, j) + extending; mx = choice1; // Default in case of equality of scores. if (choice2 > mx) { mx = choice2; } v(i, j) = mx; px = 'd'; if (v(i, j) > m(i, j)) px = 'u'; if (h(i, j) > m(i, j)) px = 'l'; p(i - 1, j - 1) = px; } } // 2) Get alignment: deque a1, a2; size_t i = s1->size(), j = s2->size(); char c; while (i > 0 && j > 0) { c = p(i - 1, j - 1); if (c == 'd') { a1.push_front((*s1)[i - 1]); a2.push_front((*s2)[j - 1]); i--; j--; } else if (c == 'u') { a1.push_front((*s1)[i - 1]); a2.push_front(-1); i--; } else { a1.push_front(-1); a2.push_front((*s2)[j - 1]); j--; } } while (i > 0) { a1.push_front((*s1)[i - 1]); a2.push_front(-1); i--; } while (j > 0) { a1.push_front(-1); a2.push_front((*s2)[j - 1]); j--; } s1->setContent(vector(a1.begin(), a1.end())); s2->setContent(vector(a2.begin(), a2.end())); AlignedSequenceContainer* asc = new AlignedSequenceContainer(s1->getAlphabet()); asc->addSequence(*s1, false); asc->addSequence(*s2, false); // Do not check for sequence names. return asc; } /******************************************************************************/ VectorSiteContainer* SiteContainerTools::sampleSites(const SiteContainer& sites, size_t nbSites, vector* index) { VectorSiteContainer* sample = new VectorSiteContainer(sites.getSequencesNames(), sites.getAlphabet()); for (size_t i = 0; i < nbSites; i++) { size_t pos = static_cast(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast(sites.getNumberOfSites()))); sample->addSite(sites.getSite(pos), false); if (index) index->push_back(pos); } return sample; } /******************************************************************************/ VectorSiteContainer* SiteContainerTools::bootstrapSites(const SiteContainer& sites) { return sampleSites(sites, sites.getNumberOfSites()); } /******************************************************************************/ const string SiteContainerTools::SIMILARITY_ALL = "all sites"; const string SiteContainerTools::SIMILARITY_NOFULLGAP = "no full gap"; const string SiteContainerTools::SIMILARITY_NODOUBLEGAP = "no double gap"; const string SiteContainerTools::SIMILARITY_NOGAP = "no gap"; /******************************************************************************/ double SiteContainerTools::computeSimilarity(const Sequence& seq1, const Sequence& seq2, bool dist, const std::string& gapOption, bool unresolvedAsGap) throw (SequenceNotAlignedException, AlphabetMismatchException, Exception) { if (seq1.size() != seq2.size()) throw SequenceNotAlignedException("SiteContainerTools::computeSimilarity.", &seq2); if (seq1.getAlphabet()->getAlphabetType() != seq2.getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("SiteContainerTools::computeSimilarity.", seq1.getAlphabet(), seq2.getAlphabet()); const Alphabet* alpha = seq1.getAlphabet(); unsigned int s = 0; unsigned int t = 0; for (size_t i = 0; i < seq1.size(); i++) { int x = seq1[i]; int y = seq2[i]; int gapCode = alpha->getGapCharacterCode(); if (unresolvedAsGap) { if (alpha->isUnresolved(x)) x = gapCode; if (alpha->isUnresolved(y)) y = gapCode; } if (gapOption == SIMILARITY_ALL) { t++; if (x == y && !alpha->isGap(x) && !alpha->isGap(y)) s++; } else if (gapOption == SIMILARITY_NODOUBLEGAP) { if (!alpha->isGap(x) || !alpha->isGap(y)) { t++; if (x == y) s++; } } else if (gapOption == SIMILARITY_NOGAP) { if (!alpha->isGap(x) && !alpha->isGap(y)) { t++; if (x == y) s++; } } else throw Exception("SiteContainerTools::computeSimilarity. Invalid gap option: " + gapOption); } double r = (t == 0 ? 0. : static_cast(s) / static_cast(t)); return dist ? 1 - r : r; } /******************************************************************************/ DistanceMatrix* SiteContainerTools::computeSimilarityMatrix(const SiteContainer& sites, bool dist, const std::string& gapOption, bool unresolvedAsGap) { size_t n = sites.getNumberOfSequences(); DistanceMatrix* mat = new DistanceMatrix(sites.getSequencesNames()); string pairwiseGapOption = gapOption; SiteContainer* sites2; if (gapOption == SIMILARITY_NOFULLGAP) { if (unresolvedAsGap) { SiteContainer* tmp = removeGapOrUnresolvedOnlySites(sites); sites2 = new AlignedSequenceContainer(*tmp); delete tmp; } else { SiteContainer* tmp = removeGapOnlySites(sites); sites2 = new AlignedSequenceContainer(*tmp); delete tmp; } pairwiseGapOption = SIMILARITY_ALL; } else { sites2 = new AlignedSequenceContainer(sites); } for (size_t i = 0; i < n; i++) { (*mat)(i, i) = dist ? 0. : 1.; const Sequence* seq1 = &sites2->getSequence(i); for (size_t j = i + 1; j < n; j++) { const Sequence* seq2 = &sites2->getSequence(j); (*mat)(i, j) = (*mat)(j, i) = computeSimilarity(*seq1, *seq2, dist, pairwiseGapOption, unresolvedAsGap); } } delete sites2; return mat; } /******************************************************************************/ void SiteContainerTools::merge(SiteContainer& seqCont1, const SiteContainer& seqCont2, bool leavePositionAsIs) throw (AlphabetMismatchException, Exception) { if (seqCont1.getAlphabet()->getAlphabetType() != seqCont2.getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("SiteContainerTools::merge.", seqCont1.getAlphabet(), seqCont2.getAlphabet()); vector seqNames1 = seqCont1.getSequencesNames(); vector seqNames2 = seqCont2.getSequencesNames(); const SiteContainer* seqCont2bis = 0; bool del = false; if (seqNames1 == seqNames2) { seqCont2bis = &seqCont2; } else { // We shall reorder sequences first: SiteContainer* seqCont2ter = new VectorSiteContainer(seqCont2.getAlphabet()); SequenceContainerTools::getSelectedSequences(seqCont2, seqNames1, *seqCont2ter); seqCont2bis = seqCont2ter; del = true; } if (leavePositionAsIs) { for (size_t i = 0; i < seqCont2bis->getNumberOfSites(); i++) { seqCont1.addSite(seqCont2bis->getSite(i), false); } } else { int offset = static_cast(seqCont1.getNumberOfSites()); for (size_t i = 0; i < seqCont2bis->getNumberOfSites(); i++) { seqCont1.addSite(seqCont2bis->getSite(i), offset + seqCont2bis->getSite(i).getPosition(), false); } } if (del) delete seqCont2bis; } /******************************************************************************/ void SiteContainerTools::getSequencePositions(const SiteContainer& sites, Matrix& positions) { positions.resize(sites.getNumberOfSequences(), sites.getNumberOfSites()); int gap = sites.getAlphabet()->getGapCharacterCode(); for (size_t i = 0; i < sites.getNumberOfSequences(); ++i) { const Sequence& seq = sites.getSequence(i); unsigned int pos = 0; for (size_t j = 0; j < sites.getNumberOfSites(); ++j) { if (seq[j] != gap) { ++pos; positions(i, j) = pos; } else { positions(i, j) = 0; } } } } /******************************************************************************/ vector SiteContainerTools::getColumnScores(const Matrix& positions1, const Matrix& positions2, int na) { if (positions1.getNumberOfRows() != positions2.getNumberOfRows()) throw Exception("SiteContainerTools::getColumnScores. The two input alignments must have the same number of sequences!"); vector scores(positions1.getNumberOfColumns()); for (size_t i = 0; i < positions1.getNumberOfColumns(); ++i) { //Find an anchor point: size_t whichSeq = 0; size_t whichPos = 0; for (size_t j = 0; j < positions1.getNumberOfRows(); ++j) { if (positions1(j, i) > 0) { whichSeq = j; whichPos = positions1(j, i); break; } } if (whichPos == 0) { //No anchor found, this alignment column is only made of gaps. We assign a score of 'na' and move to the next column. scores[i] = na; continue; } //We look for the anchor in the reference alignment: size_t i2 = 0; bool found = false; for (size_t j = 0; !found && j < positions2.getNumberOfColumns(); ++j) { if (positions2(whichSeq, j) == whichPos) { i2 = j; found = true; } } if (!found) { throw Exception("SiteContainerTools::getColumnScores(). Position " + TextTools::toString(whichPos) + " of sequence " + TextTools::toString(whichSeq) + " not found in reference alignment. Please make sure the two indexes are built from the same data!"); } //Now we compare all pairs of sequences between the two positions: bool test = true; for (size_t j = 0; test && j < positions1.getNumberOfRows(); ++j) { test = (positions1(j, i) == positions2(j, i2)); } scores[i] = test ? 1 : 0; } return scores; } /******************************************************************************/ vector SiteContainerTools::getSumOfPairsScores(const Matrix& positions1, const Matrix& positions2, double na) { if (positions1.getNumberOfRows() != positions2.getNumberOfRows()) throw Exception("SiteContainerTools::getColumnScores. The two input alignments must have the same number of sequences!"); vector scores(positions1.getNumberOfColumns()); for (size_t i = 0; i < positions1.getNumberOfColumns(); ++i) { //For all positions in alignment 1... size_t countAlignable = 0; size_t countAligned = 0; for (size_t j = 0; j < positions1.getNumberOfRows(); ++j) { //Get the corresponding column in alignment 2: size_t whichPos = positions1(j, i); if (whichPos == 0) { //No position for this sequence here. continue; } //We look for the position in the second alignment: size_t i2 = 0; bool found = false; for (size_t k = 0; !found && k < positions2.getNumberOfColumns(); ++k) { if (positions2(j, k) == whichPos) { i2 = k; found = true; } } if (!found) { throw Exception("SiteContainerTools::getColumnScores(). Position " + TextTools::toString(whichPos) + " of sequence " + TextTools::toString(j) + " not found in reference alignment. Please make sure the two indexes are built from the same data!"); } //Now we check all other positions and see if they are aligned with this one: for (size_t k = j + 1; k < positions1.getNumberOfRows(); ++k) { size_t whichPos2 = positions1(k, i); if (whichPos2 == 0) { //Empty position continue; } countAlignable++; //check position in alignment 2: if (positions2(k, i2) == whichPos2) countAligned++; } } scores[i] = countAlignable == 0 ? na : static_cast(countAligned) / static_cast(countAlignable); } return scores; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Container/AlignedSequenceContainer.h000644 000000 000000 00000016451 12147656566 024122 0ustar00rootroot000000 000000 // // File AlignedSequenceContainer.h // Created by: Guillaume Deuchst // // Julien Dutheil // Last modification : Friday August 22 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ALIGNEDSEQUENCECONTAINER_H_ #define _ALIGNEDSEQUENCECONTAINER_H_ #include "../Site.h" #include "SiteContainer.h" #include "OrderedSequenceContainer.h" #include "VectorSequenceContainer.h" #include // From the STL: #include namespace bpp { /** * @brief Aligned sequences container. * * This class inherits from the VectorSequenceContainer and add site access. * Sequence addition methods are re-defined to check for sequence lengths. * Sequence access is in \f$O(1)\f$, and site access in \f$O(n)\f$, where * \f$n\f$ is the number of sequences in the container. * * See VectorSiteContainer for an alternative implementation. * * @see VectorSequenceContainer, Sequence, Site, VectorSiteContainer */ class AlignedSequenceContainer: public virtual VectorSequenceContainer, public virtual SiteContainer { private: // Integer std::vector that contains sites's positions std::vector positions_; size_t length_; // Number of sites for verifications before sequence's insertion in sequence container /** * This is used in order to implement the SiteContainer interface. * A SiteContainer is expected to work on Site objects, but this class * -- since it is a VectorSequenceContainer -- has its data sored as * Sequence object. When the SiteContainer method getSite() is invoked * it creates a new Site object and send the address of it. * To avoid memory leaks, this object is put into a std::vector so that it can be * destroyed when the container is destroyed. */ mutable std::vector sites_; public: /** * @brief Build a new empty container with the specified alphabet. * * @param alpha The alphabet to use. */ AlignedSequenceContainer(const Alphabet* alpha): VectorSequenceContainer(alpha), positions_(), length_(0), sites_() { reindexSites(); } /** * @brief Copy constructor. * * @param asc The container to copy. */ AlignedSequenceContainer(const AlignedSequenceContainer& asc): VectorSequenceContainer(asc), positions_(asc.getSitePositions()), length_(asc.getNumberOfSites()), sites_(asc.getNumberOfSites()) {} /** * @brief Convert any SiteContainer object into a AlignedSequenceContainer object. * * @param sc The container to copy. */ AlignedSequenceContainer(const SiteContainer& sc): VectorSequenceContainer(sc), positions_(sc.getSitePositions()), length_(sc.getNumberOfSites()), sites_(sc.getNumberOfSites()) {} /** * @brief Try to coerce an OrderedSequenceContainer object into an AlignedSequenceContainer object. * * Sequences in osc will be considered alligned, and have the same number of sites. * * @param osc The ordered container to coerce. * @throw SequenceNotAlignedException If sequences in osc do not have the same length. */ AlignedSequenceContainer(const OrderedSequenceContainer& osc) throw (SequenceNotAlignedException); AlignedSequenceContainer& operator=(const AlignedSequenceContainer& asc); AlignedSequenceContainer& operator=(const SiteContainer& sc); AlignedSequenceContainer& operator=(const OrderedSequenceContainer& osc) throw (SequenceNotAlignedException); virtual ~AlignedSequenceContainer(); public: /** * @name The Clonable interface. * * @{ */ AlignedSequenceContainer* clone() const { return new AlignedSequenceContainer(*this); } /** @} */ /** * @name The SiteContainer interface implementation: * * @{ */ const Site& getSite(size_t siteIndex) const throw (IndexOutOfBoundsException); void setSite(size_t siteIndex, const Site& site, bool checkPosition = true) throw (Exception); Site * removeSite(size_t siteIndex) throw (IndexOutOfBoundsException); void deleteSite(size_t siteIndex) throw (IndexOutOfBoundsException); void deleteSites(size_t siteIndex, size_t length) throw (IndexOutOfBoundsException, Exception); void addSite(const Site& site, bool checkPosition = true) throw (Exception); void addSite(const Site& site, int position, bool checkPosition = true) throw (Exception); void addSite(const Site& site, size_t siteIndex, bool checkPosition = true) throw (Exception); void addSite(const Site& site, size_t siteIndex, int position, bool checkPosition = true) throw (Exception); size_t getNumberOfSites() const { return length_; } Vint getSitePositions() const { return positions_; } void reindexSites(); void clear(); AlignedSequenceContainer* createEmptyContainer() const; /** @} */ /** * @name Redefinition of VectorSequenceContainer methods, to check for sequence lengths. * * @{ */ void setSequence(const std::string& name, const Sequence& sequence, bool checkName = true) throw (Exception); void setSequence(size_t sequenceIndex, const Sequence& sequence, bool checkName = true) throw (Exception); void addSequence(const Sequence& sequence, bool checkName = true) throw (Exception); void addSequence(const Sequence& sequence, size_t sequenceIndex, bool checkName = true) throw (Exception); /** @} */ protected: /** * @brief Check sequence's size before insertion in sequence container. * * @param sequence The sequence to check. * @return True if sequence length = number of sites in container. */ bool checkSize_(const Sequence& sequence) { return (sequence.size() == length_); } }; } //end of namespace bpp. #endif // _ALIGNEDSEQUENCECONTAINER_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/AlignedSequenceContainer.cpp000644 000000 000000 00000041174 12147656566 024455 0ustar00rootroot000000 000000 // // File: AlignedSequenceContainer.cpp // Created by: Guillaume Deuchst // Julien Dutheil // Created on: Friday August 22 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "AlignedSequenceContainer.h" #include using namespace bpp; // From the STL: #include using namespace std; /***************************************************************************/ AlignedSequenceContainer::AlignedSequenceContainer(const OrderedSequenceContainer& osc) throw (SequenceNotAlignedException) : VectorSequenceContainer(osc.getAlphabet()), // We can't call the copy constructor because we want to use the overloaded addSequence method !!! positions_(), length_(), sites_() { // Initializing for (unsigned int i = 0; i < osc.getNumberOfSequences(); i++) { addSequence(osc.getSequence(i), true); } if (osc.getNumberOfSequences() > 0) length_ = getSequence(0).size(); // the overloaded else length_ = 0; reindexSites(); sites_.resize(length_); setGeneralComments(osc.getGeneralComments()); } /***************************************************************************/ AlignedSequenceContainer& AlignedSequenceContainer::operator=(const AlignedSequenceContainer& asc) { VectorSequenceContainer::operator=(asc); // Initializing length_ = asc.getNumberOfSites(); positions_ = asc.getSitePositions(); sites_.resize(length_); return *this; } /***************************************************************************/ AlignedSequenceContainer& AlignedSequenceContainer::operator=(const SiteContainer& sc) { VectorSequenceContainer::operator=(sc); // Initializing length_ = sc.getNumberOfSites(); positions_ = sc.getSitePositions(); sites_.resize(length_); return *this; } /***************************************************************************/ AlignedSequenceContainer& AlignedSequenceContainer::operator=(const OrderedSequenceContainer& osc) throw (SequenceNotAlignedException) { VectorSequenceContainer::operator=(osc); // Initializing length_ = 0; reindexSites(); sites_.resize(length_); return *this; } /** Class destructor: *********************************************************/ AlignedSequenceContainer::~AlignedSequenceContainer() { // delete all sites: for (unsigned int i = 0; i < sites_.size(); i++) { if (sites_[i]) delete sites_[i]; } } /***************************************************************************/ const Site& AlignedSequenceContainer::getSite(size_t i) const throw (IndexOutOfBoundsException) { if (i >= length_) throw IndexOutOfBoundsException("AlignedSequenceContainer::getSite", i, 0, getNumberOfSites() - 1); // Main loop : for all sequences size_t n = getNumberOfSequences(); std::vector site(n); for (size_t j = 0; j < n; j++) { site[j] = getSequence(j)[i]; } if (sites_[i]) delete sites_[i]; sites_[i] = new Site(site, getAlphabet(), positions_[i]); return *sites_[i]; } /******************************************************************************/ void AlignedSequenceContainer::setSite(size_t pos, const Site& site, bool checkPositions) throw (Exception) { // New site's alphabet and site container's alphabet matching verification if (pos >= getNumberOfSites()) throw IndexOutOfBoundsException("AlignedSequenceContainer::setSite", pos, 0, getNumberOfSites() - 1); if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("AlignedSequenceContainer::setSite", getAlphabet(), site.getAlphabet()); std::vector s = site.getContent(); // Check size: if (s.size() != getNumberOfSequences()) throw SiteException("AlignedSequenceContainer::setSite, site does not have the appropriate length", &site); // Check position: int position = site.getPosition(); if (checkPositions) { // For all positions in vector : throw exception if position already exists for (size_t i = 0; i < positions_.size(); i++) { if (positions_[i] == position) throw SiteException("AlignedSequenceContainer::setSite: Site position already exists in container", &site); } } // For all sequences for (size_t j = 0; j < getNumberOfSequences(); j++) { getSequence_(j).setElement(pos, s[j]); } positions_[pos] = site.getPosition(); } /******************************************************************************/ Site* AlignedSequenceContainer::removeSite(size_t pos) throw (IndexOutOfBoundsException) { if (pos >= getNumberOfSites()) throw IndexOutOfBoundsException("AlignedSequenceContainer::removeSite", pos, 0, getNumberOfSites() - 1); // Get old site getSite(pos); // Creates the site! Site* old = sites_[pos]; // For all sequences for (size_t j = 0; j < getNumberOfSequences(); j++) { getSequence_(j).deleteElement(pos); } // Delete site's position positions_.erase(positions_.begin() + pos); length_--; // Actualizes the 'sites' vector: if (sites_[pos]) delete sites_[pos]; sites_.erase(sites_.begin() + pos); // Send result return old; } /******************************************************************************/ void AlignedSequenceContainer::deleteSite(size_t pos) throw (IndexOutOfBoundsException) { if (pos >= getNumberOfSites()) throw IndexOutOfBoundsException("AlignedSequenceContainer::deleteSite", pos, 0, getNumberOfSites() - 1); // For all sequences for (size_t j = 0; j < getNumberOfSequences(); j++) { getSequence_(j).deleteElement(pos); } // Delete site's position positions_.erase(positions_.begin() + pos); length_--; // Actualizes the 'sites' vector: if (sites_[pos]) delete sites_[pos]; sites_.erase(sites_.begin() + pos); } /******************************************************************************/ void AlignedSequenceContainer::deleteSites(size_t siteIndex, size_t length) throw (IndexOutOfBoundsException, Exception) { if (siteIndex + length > getNumberOfSites()) throw IndexOutOfBoundsException("AlignedSequenceContainer::deleteSites", siteIndex + length, 0, getNumberOfSites() - 1); // For all sequences for (size_t j = 0; j < getNumberOfSequences(); j++) { getSequence_(j).deleteElements(siteIndex, length); } // Delete site's siteIndexition positions_.erase(positions_.begin() + siteIndex, positions_.begin() + siteIndex + length); length_ -= length; // Actualizes the 'sites' vector: for (size_t i = siteIndex; i < siteIndex + length; ++i) { if (sites_[i]) delete sites_[i]; } sites_.erase(sites_.begin() + siteIndex, sites_.begin() + siteIndex + length); } /******************************************************************************/ void AlignedSequenceContainer::addSite(const Site& site, bool checkPositions) throw (Exception) { // New site's alphabet and site container's alphabet matching verification if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("AlignedSequenceContainer::addSite"); // Initializing std::vector s = site.getContent(); // Check size: if (s.size() != getNumberOfSequences()) throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", &site); // Check position: int position = site.getPosition(); if (checkPositions) { // For all positions in vector : throw exception if position already exists for (unsigned int i = 0; i < positions_.size(); i++) { if (positions_[i] == position) throw SiteException("AlignedSequenceContainer::addSite: Site position already exists in container", &site); } } // For all sequences for (unsigned int j = 0; j < getNumberOfSequences(); j++) { getSequence_(j).addElement(s[j]); } length_++; positions_.push_back(position); // Actualizes the 'sites' vector: sites_.push_back(0); } /******************************************************************************/ void AlignedSequenceContainer::addSite(const Site& site, int position, bool checkPositions) throw (Exception) { // New site's alphabet and site container's alphabet matching verification if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("AlignedSequenceContainer::addSite"); // Initializing std::vector s = site.getContent(); // Check size: if (s.size() != getNumberOfSequences()) throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", &site); // Check position: if (checkPositions) { // For all positions in vector : throw exception if position already exists for (unsigned int i = 0; i < positions_.size(); i++) { if (positions_[i] == position) throw SiteException("AlignedSequenceContainer::addSite: Site position already exists in container", &site); } } // For all sequences for (unsigned int j = 0; j < getNumberOfSequences(); j++) { getSequence_(j).addElement(s[j]); } length_++; positions_.push_back(position); // Actualizes the 'sites' vector: sites_.push_back(0); } /******************************************************************************/ void AlignedSequenceContainer::addSite(const Site& site, size_t siteIndex, bool checkPositions) throw (Exception) { if (siteIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("AlignedSequenceContainer::addSite", siteIndex, 0, getNumberOfSites() - 1); // New site's alphabet and site container's alphabet matching verification if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("AlignedSequenceContainer::addSite", getAlphabet(), site.getAlphabet()); std::vector s = site.getContent(); // Check size: if (s.size() != getNumberOfSequences()) throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", &site); // Check position: int position = site.getPosition(); if (checkPositions) { // For all positions in vector : throw exception if position already exists for (size_t i = 0; i < positions_.size(); i++) { if (positions_[i] == position) throw SiteException("AlignedSequenceContainer::addSite: Site position already exists in container", &site); } } // For all sequences for (size_t j = 0; j < getNumberOfSequences(); j++) { getSequence_(j).addElement(siteIndex, site[j]); } length_++; positions_.insert(positions_.begin() + siteIndex, position); // Actualizes the 'sites' vector: sites_.insert(sites_.begin() + siteIndex, 0); } /******************************************************************************/ void AlignedSequenceContainer::addSite(const Site& site, size_t siteIndex, int position, bool checkPositions) throw (Exception) { if (siteIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("AlignedSequenceContainer::addSite", siteIndex, 0, getNumberOfSites() - 1); // New site's alphabet and site container's alphabet matching verification if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("AlignedSequenceContainer::addSite", getAlphabet(), site.getAlphabet()); std::vector s = site.getContent(); // Check size: if (s.size() != getNumberOfSequences()) throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", &site); // Check position: if (checkPositions) { // For all positions in vector : throw exception if position already exists for (size_t i = 0; i < positions_.size(); i++) { if (positions_[i] == position) throw SiteException("AlignedSequenceContainer::addSite: Site position already exists in container", &site); } } // For all sequences for (size_t j = 0; j < getNumberOfSequences(); j++) { getSequence_(j).addElement(siteIndex, site[j]); } length_++; positions_.insert(positions_.begin() + siteIndex, position); // Actualizes the 'sites' vector: sites_.insert(sites_.begin() + siteIndex, 0); } /******************************************************************************/ void AlignedSequenceContainer::reindexSites() { positions_.resize(length_); for (size_t i = 0; i < length_; i++) { positions_[i] = static_cast(i + 1); // start with 1. } } /******************************************************************************/ void AlignedSequenceContainer::setSequence(size_t i, const Sequence& sequence, bool checkName) throw (Exception) { if (i >= getNumberOfSequences()) throw IndexOutOfBoundsException("AlignedSequenceContainer::setSequence", i, 0, getNumberOfSequences() - 1); // if container has only one sequence if (getNumberOfSequences() == 1) length_ = sequence.size(); if (checkSize_(sequence)) VectorSequenceContainer::setSequence(i, sequence, checkName); else throw SequenceNotAlignedException("AlignedSequenceContainer::setSequence", &sequence); } /******************************************************************************/ void AlignedSequenceContainer::setSequence(const string& name, const Sequence& sequence, bool checkName) throw (Exception) { // if container has only one sequence if (getNumberOfSequences() == 1) length_ = sequence.size(); if (checkSize_(sequence)) VectorSequenceContainer::setSequence(name, sequence, checkName); else throw SequenceNotAlignedException("AlignedSequenceContainer::setSequence", &sequence); } /******************************************************************************/ void AlignedSequenceContainer::addSequence(const Sequence& sequence, bool checkName) throw (Exception) { // if container has only one sequence if (length_ == 0) { length_ = sequence.size(); sites_.resize(length_); reindexSites(); } if (checkSize_(sequence)) VectorSequenceContainer::addSequence(sequence, checkName); else throw SequenceNotAlignedException("AlignedSequenceContainer::addSequence", &sequence); } /******************************************************************************/ void AlignedSequenceContainer::addSequence(const Sequence& sequence, size_t i, bool checkName) throw (Exception) { if (i >= getNumberOfSequences()) throw IndexOutOfBoundsException("AlignedSequenceContainer::addSequence", i, 0, getNumberOfSequences() - 1); // if container has only one sequence if (length_ == 0) length_ = sequence.size(); if (checkSize_(sequence)) VectorSequenceContainer::addSequence(sequence, i, checkName); else throw SequenceNotAlignedException("AlignedSequenceContainer::addSequence", &sequence); } /******************************************************************************/ void AlignedSequenceContainer::clear() { length_ = 0; VectorSequenceContainer::clear(); } /******************************************************************************/ AlignedSequenceContainer* AlignedSequenceContainer::createEmptyContainer() const { AlignedSequenceContainer* asc = new AlignedSequenceContainer(getAlphabet()); asc->setGeneralComments(getGeneralComments()); return asc; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Container/SequenceContainerExceptions.h000644 000000 000000 00000010411 12147656566 024666 0ustar00rootroot000000 000000 // // File: SequenceContainerExceptions.h // Created by: Julien Dutheil // Created on: Mon Nov 3 17:00:05 2003 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCECONTAINEREXCEPTIONS_H_ #define _SEQUENCECONTAINEREXCEPTIONS_H_ #include namespace bpp { class SequenceContainer; /** * @brief Exception thrown when a sequence is not found The sequence not found exception base class. */ class SequenceNotFoundException : public Exception { protected: /** * @brief The id of the sequence that was to be found. */ const std::string id; public: /** * @brief Build a new SequenceNotFoundException object. * * @param text A message to be passed to the exception hierarchy. * @param seqId A the id of the sequence that was to be found. */ SequenceNotFoundException(const char * text, const char * seqId = "") : Exception("SequenceNotFoundException: " + std::string(text) + "(" + seqId + ")"), id(seqId) {}; /** * @brief Build a new SequenceNotFoundException object. * * @param text A message to be passed to the exception hierarchy. * @param seqId A the id of the sequence that was to be found. */ SequenceNotFoundException(const std::string & text, const std::string & seqId = "") : Exception("SequenceNotFoundException: " + text + "(" + seqId + ")"), id(seqId) {}; // Class destructor virtual ~SequenceNotFoundException() throw() {} public: /** * @brief Get the id of the sequence that was to be found. * * @return The id of the sequence that was to be found. */ virtual const std::string getSequenceId() const { return id; } }; /** * @brief Exception thrown when an empty container is found. */ class EmptyContainerException : public Exception { private: /** * @brief The empty container. */ const SequenceContainer *container_; public: /** * @brief Build a new EmptyContainerException object. * * @param text A message to be passed to the exception hierarchy. * @param container The empty container. */ EmptyContainerException(const std::string& text, const SequenceContainer* container) : Exception("EmptyContainerException: " + text), container_(container) {}; EmptyContainerException(const EmptyContainerException& ece): Exception(ece), container_(ece.container_) {} EmptyContainerException& operator=(const EmptyContainerException& ece) { Exception::operator=(ece); container_ = ece.container_; return *this; } // Class destructor virtual ~EmptyContainerException() throw() {} public: /** * @return The empty container. */ virtual const SequenceContainer* getContainer() const { return container_; } }; } //end of namespace bpp. #endif //_SEQUENCECONTAINEREXCEPTIONS_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/SiteContainerTools.h000644 000000 000000 00000053222 12147656566 023010 0ustar00rootroot000000 000000 // // File: SiteContainerTools.h // Created by: Julien Dutheil // Created on: Fri Dec 12 18:55:06 2003 // #ifndef _SITECONTAINERTOOLS_H_ #define _SITECONTAINERTOOLS_H_ /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SiteContainer.h" #include "VectorSiteContainer.h" #include "AlignedSequenceContainer.h" #include "../AlphabetIndex/AlphabetIndex2.h" #include "../DistanceMatrix.h" #include //From the STL: #include #include namespace bpp { typedef std::vector SiteSelection; /** * @brief Some utililitary methods to deal with site containers. */ class SiteContainerTools { public: SiteContainerTools() {} virtual ~SiteContainerTools() {} public: /** * @brief Retrieves sites without gaps from SiteContainer. * * This function build a new SiteContainer instance with only sites without gaps. * The container passed as input is not modified, all sites are copied. * * @param sites The container to analyse. * @return A pointer toward a new SiteContainer with only sites with no gaps. */ static SiteContainer* getSitesWithoutGaps(const SiteContainer& sites); /** * @brief Retrieves complete sites from SiteContainer. * * This function build a new SiteContainer instance with only complete sites, * i.e. site with fully resolved states (no gap, no unknown caracters). * The container passed as input is not modified, all sites are copied. * * @param sites The container to analyse. * @return A pointer toward a new SiteContainer with only complete sites. */ static SiteContainer* getCompleteSites(const SiteContainer& sites); /** * @brief Get a site set without gap-only sites. * * This function build a new SiteContainer instance without sites with only gaps. * The container passed as input is not modified, all sites are copied. * * @see removeGapOnlySites(SiteContainer& sites) * @param sites The container to analyse. * @return A pointer toward a new SiteContainer. */ static SiteContainer* removeGapOnlySites(const SiteContainer& sites); /** * @brief Remove gap-only sites from a site set. * * @param sites The container where the sites have to be removed. */ static void removeGapOnlySites(SiteContainer& sites); /** * @brief Get a site set without gap/unresolved-only sites. * * This function build a new SiteContainer instance without sites with only gaps or unresolved characters. * The container passed as input is not modified, all sites are copied. * * @param sites The container to analyse. * @return A pointer toward a new SiteContainer. */ static SiteContainer* removeGapOrUnresolvedOnlySites(const SiteContainer& sites); /** * @brief Remove gap/unresolved-only sites from a site set. * * @param sites The container where the sites have to be removed. */ static void removeGapOrUnresolvedOnlySites(SiteContainer& sites); /** * @brief Get a siteset with sites with less than a given amount of gaps. * * @param sites The container from which the sites have to be removed. * @param maxFreqGaps The maximum frequency of gaps in each site. * @return A pointer toward a new SiteContainer. */ static SiteContainer* removeGapSites(const SiteContainer& sites, double maxFreqGaps); /** * @brief Remove sites with a given amount of gaps. * * @param sites The container from which the sites have to be removed. * @param maxFreqGaps The maximum frequency of gaps in each site. */ static void removeGapSites(SiteContainer& sites, double maxFreqGaps); /** * @brief Get a site set without stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception. * * This function build a new SiteContainer instance without sites that have at least a stop codon. * The container passed as input is not modified, all sites are copied. * * @param sites The container to analyse. * @return A pointer toward a new SiteContainer. */ static SiteContainer* removeStopCodonSites(const SiteContainer& sites) throw (AlphabetException); /** * @brief Create a new container with a specified set of sites. * * A new VectorSiteContainer is created with specified sites. * The destruction of the container is up to the user. * Sites are specified by their indice, beginning at 0. * No position verification is performed, based on the assumption that * the container passed as an argument is a correct one. * Redundant selection is not checked, so be careful with what you're doing! * * @param sequences The container from wich sequences are to be taken. * @param selection The positions of all sites to retrieve. * @return A new container with all selected sites. */ static SiteContainer* getSelectedSites(const SiteContainer& sequences, const SiteSelection& selection); /** * @brief create the consensus sequence of the alignment. * * In case of ambiguity (for instance a AATT site), one state will be chosen arbitrarily. * * @param sc a site container * @param name the name of the sequence object that will be created. * @param ignoreGap Tell if gap must be counted or not. If not (true option), only fully gapped sites will result in a gap in the consensus sequence. * @param resolveUnknown Tell is unknnown characters must resolved. In a DNA sequence for instance, N will be counted as A=1/4, T=1/4, G=1/4 and C=1/4. Otherwise it will be counted as N=1. * If this option is set to true, a consensus sequence will never contain an unknown character. * @return A new Sequence object with the consensus sequence. */ static const Sequence* getConsensus(const SiteContainer & sc, const std::string& name = "consensus", bool ignoreGap = true, bool resolveUnknown = false); /** * @brief Change all gaps to unknown state in a container, according to its alphabet. * * For DNA alphabets, this change all '-' to 'N'. * * @param sites The container to be modified. */ static void changeGapsToUnknownCharacters(SiteContainer& sites); /** * @brief Change all unresolved characters to gaps in a container, according to its alphabet. * * For DNA alphabets, this change all 'N', 'M', 'R', etc. to '-'. * * @param sites The container to be modified. */ static void changeUnresolvedCharactersToGaps(SiteContainer& sites); /** * @brief Resolve a container with "." notations. * * @code * ATGCCGTTGG * .C...A..C. * ..A....C.. * @endcode * will results in * @code * ATGCCGTTGG * ACCCCATTCG * ATACCGTCGG * @endcode * for instance. * The first sequence is here called the "reference" sequence. * It need not be the first in the container. * The alphabet of the input alignment must be an instance of the DefaultAlphabet class, the only one which support dot characters. * A new alignment is created and returned, with the specified alphabet. * * If several sequences that may be considered as reference are found, the first one is used. * * @param dottedAln The input alignment. * @param resolvedAlphabet The alphabet of the output alignment. * @return A pointer toward a dynamically created SiteContainer with the specified alphabet (can be a DefaultAlphabet). * @throw AlphabetException If the alphabet of the input alignment is not of class DefaultAlphabet, or if one character does not match with the output alphabet. * @throw Exception If no reference sequence was found, or if the input alignment contains no sequence. */ static SiteContainer* resolveDottedAlignment(const SiteContainer& dottedAln, const Alphabet* resolvedAlphabet) throw (AlphabetException, Exception); /** * @name Sequences coordinates. * * @see SequenceWalker For an alternative approach. * @{ */ /** * @brief Get the index of each sequence position in an aligned sequence. * * If the sequence contains no gap, the translated and the original positions are the same. * Position numbers start at 1. * * @param seq The sequence to translate. * @return A map with original sequence positions as keys, and translated positions as values. */ static std::map getSequencePositions(const Sequence& seq); /** * @brief Get the index of each alignment position in an aligned sequence. * * If the sequence contains no gap, the translated and the original positions are the same. * Position numbers start at 1. * * @param seq The sequence to translate. * @return A map with original alignement positions as keys, and translated positions as values. */ static std::map getAlignmentPositions(const Sequence& seq); /** * @brief Fill a numeric matrix with the size of the alignment, containing the each sequence position. * * Positions start at 1, gaps have "position" 0. * * @param sites The input alignment. * @param positions A matrix object which is going to be resized and filled with the corresponding positions. * @author Julien Dutheil */ static void getSequencePositions(const SiteContainer& sites, Matrix& positions); /** @} */ /** * @brief Translate alignement positions from an aligned sequence to the same sequence in a different alignment. * * Takes each position (starting at 1) in sequence 1, and look for the corresponding position in sequence 2. * The two sequences must be the same, excepted for the gaps. * If no sequence contains gaps, or if the gaps are at the same place in both sequences, the translated postion will be the same as the original positions. * * @param seq1 The sequence to translate. * @param seq2 The reference sequence. * @return A map with original alignement positions as keys, and translated positions as values. * @throw AlphabetMismatchException If the sequences do not share the same alphabet. * @throw Exception If the sequence do not match. */ static std::map translateAlignment(const Sequence& seq1, const Sequence& seq2) throw (AlphabetMismatchException, Exception); /** * @brief Translate sequence positions from a sequence to another in the same alignment. * * Takes each position (starting at 1) in sequence 1, and look for the corresponding position in sequence 2 at the same site. * If no corresponding position is available (i.e. if there is a gap in sequence 2 at the corresponding position), 0 is returned. * * @param sequences The alignment to use. * @param i1 The index of the sequence to translate. * @param i2 The index of the reference sequence. * @return A map with original sequence positions as keys, and translated positions as values. */ static std::map translateSequence(const SiteContainer& sequences, size_t i1, size_t i2); /** * @brief Align two sequences using the Needleman-Wunsch dynamic algorithm. * * If the input sequences contain gaps, they will be ignored. * * @see BLOSUM50, DefaultNucleotideScore for score matrices. * * @param seq1 The first sequence. * @param seq2 The second sequence. * @param s The score matrix to use. * @param gap Gap penalty. * @return A new SiteContainer instance. * @throw AlphabetMismatchException If the sequences and the score matrix do not share the same alphabet. */ static AlignedSequenceContainer* alignNW(const Sequence& seq1, const Sequence& seq2, const AlphabetIndex2& s, double gap) throw (AlphabetMismatchException); /** * @brief Align two sequences using the Needleman-Wunsch dynamic algorithm. * * If the input sequences contain gaps, they will be ignored. * * @see BLOSUM50, DefaultNucleotideScore for score matrices. * * @param seq1 The first sequence. * @param seq2 The second sequence. * @param s The score matrix to use. * @param opening Gap opening penalty. * @param extending Gap extending penalty. * @return A new SiteContainer instance. * @throw AlphabetMismatchException If the sequences and the score matrix do not share the same alphabet. */ static AlignedSequenceContainer* alignNW(const Sequence& seq1, const Sequence& seq2, const AlphabetIndex2& s, double opening, double extending) throw (AlphabetMismatchException); /** * @brief Sample sites in an alignment. * * Original site positions will be kept. The resulting container will hence probably have duplicated * positions. You may wish to call the reindexSites() method on the returned container. * * Note: This method will be optimal with a container with vertical storage like VectorSiteContainer. * * @param sites An input alignment to sample. * @param nbSites The size of the resulting container. * @param index [out] If non-null the underlying vector will be appended with the original site indices. * @return A sampled alignment with nbSites sites taken from the input one. */ static VectorSiteContainer* sampleSites(const SiteContainer& sites, size_t nbSites, std::vector* index = 0); /** * @brief Bootstrap sites in an alignment. * * Original site positions will be kept. The resulting container will hence probably have duplicated * positions. You may wish to call the reindexSites() method on the returned container. * * Note: This method will be optimal with a container with vertical storage like VectorSiteContainer. * * @param sites An input alignment to sample. * @return A sampled alignment with the same number of sites than the input one. */ static VectorSiteContainer* bootstrapSites(const SiteContainer& sites); /** * @brief Compute the similarity/distance score between two aligned sequences. * * The similarity measures are computed as the proportion of identical match. * The distance between the two sequences is defined as 1 - similarity. * This function can be used with any type of alphabet. * * @param seq1 The first sequence. * @param seq2 The second sequence. * @param dist Shall we return a distance instead of similarity? * @param gapOption How to deal with gaps: * - SIMILARITY_ALL: all positions are used. * - SIMILARITY_NODOUBLEGAP: ignore all positions with a gap in the two sequences. * - SIMILARITY_NOGAP: ignore all positions with a gap in at least one of the two sequences. * @param unresolvedAsGap Tell if unresolved characters must be considered as gaps when counting. * If set to yes, the gap option will also apply to unresolved characters. * @return The proportion of matches between the two sequences. * @throw SequenceNotAlignedException If the two sequences do not have the same length. * @throw AlphabetMismatchException If the two sequences do not share the same alphabet type. * @throw Exception If an invalid gapOption is passed. */ static double computeSimilarity(const Sequence& seq1, const Sequence& seq2, bool dist = false, const std::string& gapOption = SIMILARITY_NODOUBLEGAP, bool unresolvedAsGap = true) throw (SequenceNotAlignedException, AlphabetMismatchException, Exception); /** * @brief Compute the similarity matrix of an alignment. * * The similarity measures are computed as the proportion of identical match. * The distance between the two sequences is defined as 1 - similarity. * This function can be used with any type of alphabet. * Several options concerning gaps and unresolved characters are proposed: * - SIMILARITY_ALL: all positions are used. * - SIMILARITY_NOFULLGAP: ignore positions with a gap in all the sequences in the alignment. * - SIMILARITY_NODOUBLEGAP: ignore all positions with a gap in the two sequences for each pair. * - SIMILARITY_NOGAP: ignore all positions with a gap in at least one of the two sequences for each pair. * * * @see computeSimilarityMatrix * * @param sites The input alignment. * @param dist Shall we return a distance instead of similarity? * @param gapOption How to deal with gaps. * @param unresolvedAsGap Tell if unresolved characters must be considered as gaps when counting. * If set to yes, the gap option will also apply to unresolved characters. * @return All pairwise similarity measures. */ static DistanceMatrix* computeSimilarityMatrix(const SiteContainer& sites, bool dist = false, const std::string& gapOption = SIMILARITY_NOFULLGAP, bool unresolvedAsGap = true); static const std::string SIMILARITY_ALL; static const std::string SIMILARITY_NOFULLGAP; static const std::string SIMILARITY_NODOUBLEGAP; static const std::string SIMILARITY_NOGAP; /** * @brief Add the content of a site container to an exhisting one. * * The input containers are supposed to have unique sequence names. * If it is not the case, several things can happen: * - If the two containers have exactly the same names in the same order, then the content of the second one will be added as is to the first one. * - If the second container does not have exactly the same sequences names or in a different order, then a reordered selection of the second contianer is created first, * and in that case, only the first sequence with a given name will be used and duplicated. * In any case, note that the second container should always contains all the sequence names from the first one, * otherwise an exception will be thrown. * * @author Julien Dutheil * * @param seqCont1 First container. * @param seqCont2 Second container. This container must contain sequences with the same names as in seqcont1. * Additional sequences will be ignored. * @param leavePositionAsIs Tell is site position should be unchanged. Otherwise (the default) is to add the size of container 1 to the positions in container 2. * @throw AlphabetMismatchException If the alphabet in the 2 containers do not match. * @throw Exception If sequence names do not match. */ static void merge(SiteContainer& seqCont1, const SiteContainer& seqCont2, bool leavePositionAsIs = false) throw (AlphabetMismatchException, Exception); /** * @brief Compare an alignment to a reference alignment, and compute the column scores. * * Calculations are made according to formula for the "CS" score in Thompson et al 1999, Nucleic Acids Research (1999):27(13);2682–2690. * * @param positions1 Alignment index for the test alignment. * @param positions2 Alignment index for the reference alignment. * @param na The score to use if the tested column is full of gap. * @return A vector of score, as 0 or 1. * @see getSequencePositions for creating the alignment indexes. * @warning The indexes for the two alignments must have the sequences in the exact same order! * @author Julien Dutheil */ static std::vector getColumnScores(const Matrix& positions1, const Matrix& positions2, int na = 0); /** * @brief Compare an alignment to a reference alignment, and compute the sum-of-pairs scores. * * Calculations are made according to formula for the "SPS" score in Thompson et al 1999, Nucleic Acids Research (1999):27(13);2682–2690. * * @param positions1 Alignment index for the test alignment. * @param positions2 Alignment index for the reference alignment. * @param na The score to use if the tested column is not testable, that is not containing at least to residues. * @return A vector of score, between 0 and 1 (+ na value). * @see getSequencePositions for creating the alignment indexes. * @warning The indexes for the two alignments must have the sequences in the exact same order! * @author Julien Dutheil */ static std::vector getSumOfPairsScores(const Matrix& positions1, const Matrix& positions2, double na = 0); }; } //end of namespace bpp. #endif //_SITECONTAINERTOOLS_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/SequenceContainerTools.h000644 000000 000000 00000030710 12147656566 023651 0ustar00rootroot000000 000000 // // File: SequenceContainerTools.h // Created by: Julien Dutheil // Sylvain Gaillard // Created on: Sat Oct 4 09:18:34 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCECONTAINERTOOLS_H_ #define _SEQUENCECONTAINERTOOLS_H_ // From the STL: #include #include #include #include #include "SequenceContainer.h" #include "OrderedSequenceContainer.h" namespace bpp { typedef std::vector SequenceSelection; /** * @brief Utilitary methods dealing with sequence containers. */ class SequenceContainerTools { public: SequenceContainerTools() {} virtual ~SequenceContainerTools() {} public: /** * @brief Create a container with @f$n@f$ void sequences. * * A new VectorSequenceContainer with the specified alphabet is created. * The destruction of this new container is up to the user. * Sequences have name "0", "1",... "n-1" and no content and comments. * * @param alphabet The alphabet to use in the container. * @param size The number of sequences in the container. * @return A pointer toward a newly created container. */ static SequenceContainer* createContainerOfSpecifiedSize(const Alphabet* alphabet, size_t size); /** * @brief Create a container with specified names. * * A new VectorSequenceContainer with the specified alphabet is created. * The destruction of this new container is up to the user. * Sequences have the specified names and no content and comments. * * @param alphabet The alphabet to use in the container. * @param seqNames The names of the sequences. * @return A pointer toward a newly created container. * @throw Exception If two sequence names are not unique. */ static SequenceContainer* createContainerWithSequenceNames( const Alphabet* alphabet, const std::vector& seqNames) throw (Exception); /** * @brief Generic function which creates a new container from another one, * by specifying the class of sequence to be stored. * * Compared to several copy constructors, this function allows to change the class of * the inner sequence class used for storing sequences. * The function used the addSequence method, so that it can also be used to * concatenate containers. * * @param input The container to copy. * @param output The container where new sequences will be appended. */ template static void convertContainer(const ContFrom& input, ContTo& output) { for (size_t i = 0; i < input.getNumberOfSequences(); ++i) { std::auto_ptr seq(new Seq(input.getSequence(i))); output.addSequence(*seq); } } /** * @brief Add a specified set of sequences from a container to another. * * Sequences are specified by their position, beginning at 0. * Name verification will be performed, only if the output container is not empty, * based on the assumption that the container passed as argument is a correct one. * Redundant selection is not checked, so be careful with what you're doing! * * @author Julien Dutheil * * @param sequences The container from wich sequences are to be taken. * @param selection The positions of all sequences to retrieve. * @param outputCont A container where the selection should be added. * @throw Exception In case of bad sequence name, alphabet mismatch, etc. */ static void getSelectedSequences(const OrderedSequenceContainer& sequences, const SequenceSelection& selection, SequenceContainer& outputCont) throw (Exception); /** * @brief Add a specified set of sequences from a container to another. * * Sequences are specified by their names. * Name verification will be performed, only if the output container is not empty, * based on the assumption that the container passed as argument is a correct one. * Redundant selection is not checked, so be careful with what you're doing! * * @author Julien Dutheil * * @param sequences The container from wich sequences are to be taken. * @param selection The names of all sequences to retrieve. * @param outputCont A container where the selection should be added. * @param strict If yes, trying to select a sequence that is not present * will raise an exception. If no, only available sequence will be added. * @throw Exception In case of bad sequence name, alphabet mismatch, etc. */ static void getSelectedSequences(const SequenceContainer& sequences, const std::vector& selection, SequenceContainer& outputCont, bool strict = true) throw (Exception); /** * @brief Remove all sequences that are not in a given selection from a given container. * * A new VectorSequenceContainer is created with specified sequences. * The destruction of the container is up to the user. * Sequences are specified by their position, beginning at 0. * Redundant selection is not checked, so be careful with what you're doing! * * @param sequences The container from wich sequences are to be taken. * @param selection The positions of all sequences to retrieve. * @return A new container with all selected sequences. */ static void keepOnlySelectedSequences(OrderedSequenceContainer& sequences, const SequenceSelection& selection); /** * @brief Check if all sequences in a SequenceContainer have the same length. * * @param sequences The container to check. * @return True is all sequence have the same length. */ static bool sequencesHaveTheSameLength(const SequenceContainer& sequences); /** * @brief Compute base counts * * Example of usage: getting the GC count from a sequence container. * * map counts; * SequenceContainerTools::getCounts(myContainer, counts); //My container is previously defined. * int GCcontent = counts[1] + counts[2] ; * * * States are stored as their int code. */ static void getCounts(const SequenceContainer& sequences, std::map&); /** * @brief Compute base frequencies. * * Example of usage: getting the GC content from a sequence container. * * map freqs; * SequenceContainerTools::getFrequencies(myContainer, freqs); //My container is previously defined. * double GCcontent = (freqs[1] + freqs[2]) / (freqs[0] + freqs[1] + freqs[2] + freqs[3]); * * * States are stored as their int code. */ static void getFrequencies(const SequenceContainer& sequences, std::map& f, double pseudoCount = 0); /** * @brief Append all the sequences of a SequenceContainer to the end of another. * * @param seqCont1 The SequenceContainer in which the sequences will be added. * @param seqCont2 The SequenceContainer from which the sequences are taken. * @param checkNames Tell if the sequence names should be check for unicity. */ static void append(SequenceContainer& seqCont1, const SequenceContainer& seqCont2, bool checkNames = true) throw (Exception) { std::vector seqNames = seqCont2.getSequencesNames(); for (size_t i = 0; i < seqNames.size(); i++) seqCont1.addSequence(seqCont2.getSequence(seqNames[i]), checkNames); } /** * @brief Append all the sequences of a SequenceContainer to the end of another, OrderedSequenceContainer implementation. * * @param seqCont1 The SequenceContainer in which the sequences will be added. * @param seqCont2 The SequenceContainer from which the sequences are taken. * @param checkNames Tell if the sequence names should be check for unicity. */ static void append(SequenceContainer& seqCont1, const OrderedSequenceContainer& seqCont2, bool checkNames=true) throw (Exception) { for (size_t i = 0; i < seqCont2.getNumberOfSequences(); i++) seqCont1.addSequence(seqCont2.getSequence(i), checkNames); } /** * @brief Concatenate the sequences from two containers. * * This method will not check the original sequence names for unicity. If sequences do not have a unique name, * then the resulting merged container will contain the first sequence with the given duplicated name. * * @author Julien Dutheil * * @param seqCont1 First container. * @param seqCont2 Second container. This container must contain sequences with the same names as in seqcont1. * Additional sequences will be ignored. * @param outputCont Output sequence container to which concatenated sequences will be added. * @throw AlphabetMismatchException If the alphabet in the 3 containers do not match. */ static void merge(const SequenceContainer& seqCont1, const SequenceContainer& seqCont2, SequenceContainer& outputCont) throw (Exception) { if (seqCont1.getAlphabet()->getAlphabetType() != seqCont2.getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("SequenceContainerTools::merge.", seqCont1.getAlphabet(), seqCont2.getAlphabet()); std::vector seqNames = seqCont1.getSequencesNames(); for (size_t i = 0; i < seqNames.size(); i++) { BasicSequence tmp = seqCont1.getSequence(seqNames[i]); tmp.append(seqCont2.getContent(seqNames[i])); outputCont.addSequence(tmp, false); } } /** * @brief Convert a SequenceContainer with a new alphabet. * * This method assume that the original container has proper sequence names. * Names will be checked only if the output container is not empty. * @param seqCont The container to convert. * @param outputCont A container (most likely empty) with an alphabet into which the container will be converted. */ static void convertAlphabet(const SequenceContainer& seqCont, SequenceContainer& outputCont) throw (Exception) { std::vector seqNames = seqCont.getSequencesNames(); bool checkNames = outputCont.getNumberOfSequences() > 0; for (size_t i = 0; i < seqNames.size(); i++) { BasicSequence seq(seqNames[i], seqCont.toString(seqNames[i]), outputCont.getAlphabet()); outputCont.addSequence(seq, checkNames); } } /** * @brief Extract a certain position (1, 2 or 3) from a container of codon sequences and returns the resulting nucleotide container. * * @param sequences The input sequence container, with codon alphabet. * @param pos The codon position to retrieve. * @return A SequenceContainer with a nucleotide alphabet. * @throw AlphabetException If input sequences are not registered with a codon alphabet. */ static SequenceContainer* getCodonPosition(const SequenceContainer& sequences, size_t pos) throw (AlphabetException); }; } //end of namespace bpp. #endif //_SEQUENCECONTAINERTOOLS_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/SequenceContainerIterator.cpp000644 000000 000000 00000005225 12147656566 024700 0ustar00rootroot000000 000000 // // File: SequenceContainerIterator.cpp // Created by: Julien Dutheil // Created on: Tue Feb 26 14:44 2013 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SequenceContainerIterator.h" using namespace bpp; // From the STL: #include using namespace std; /******************************************************************************/ AbstractSequenceContainerIterator::AbstractSequenceContainerIterator(const OrderedSequenceContainer& sequences) : sequences_(&sequences), currentPosition_(0) {} /******************************************************************************/ SimpleSequenceContainerIterator::SimpleSequenceContainerIterator(const OrderedSequenceContainer& sequences): AbstractSequenceContainerIterator(sequences) {} const Sequence* SimpleSequenceContainerIterator::nextSequence() { const Sequence* s = &sequences_->getSequence(currentPosition_); currentPosition_++; return s; } bool SimpleSequenceContainerIterator::hasMoreSequences() const { return currentPosition_ < static_cast(sequences_->getNumberOfSequences()); } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Container/CompressedVectorSiteContainer.cpp000644 000000 000000 00000045262 12147656566 025537 0ustar00rootroot000000 000000 // // File: CompressedCompressedVectorSiteContainer.cpp // Created by: Julien Dutheil // Created on: Wed Dec 16 12:08 2009 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "CompressedVectorSiteContainer.h" #include #include using namespace std; using namespace bpp; /** Class constructors: *******************************************************/ CompressedVectorSiteContainer::CompressedVectorSiteContainer( const std::vector& vs, const Alphabet* alpha) throw (Exception) : AbstractSequenceContainer(alpha), sites_(0), index_(0), names_(0), comments_(0), sequences_(0) { if (vs.size() == 0) throw Exception("CompressedVectorSiteContainer::CompressedVectorSiteContainer. Empty site set."); // Seq names and comments: size_t nbSeq = vs[0]->size(); names_.resize(nbSeq); comments_.resize(nbSeq); for (size_t i = 0; i < nbSeq; i++) { names_[i] = "Seq_" + TextTools::toString(i); comments_[i] = new Comments(); } // Now try to add each site: for (size_t i = 0; i < vs.size(); i++) { addSite(*vs[i]); // This may throw an exception if position argument already exists or is size is not valid. } sequences_.resize(nbSeq); } /******************************************************************************/ CompressedVectorSiteContainer::CompressedVectorSiteContainer(size_t size, const Alphabet* alpha) : AbstractSequenceContainer(alpha), sites_(0), index_(0), names_(size), comments_(size), sequences_(size) { // Seq names and comments: for (size_t i = 0; i < size; i++) { names_[i] = "Seq_" + i; comments_[i] = new Comments(); } } /******************************************************************************/ CompressedVectorSiteContainer::CompressedVectorSiteContainer(const std::vector& names, const Alphabet* alpha) : AbstractSequenceContainer(alpha), sites_(0), index_(0), names_(names.size()), comments_(names.size()), sequences_(names.size()) { // Seq names and comments: for (size_t i = 0; i < names.size(); i++) { names_[i] = names[i]; comments_[i] = new Comments(); } } /******************************************************************************/ CompressedVectorSiteContainer::CompressedVectorSiteContainer(const Alphabet* alpha) : AbstractSequenceContainer(alpha), sites_(0), index_(0), names_(0), comments_(0), sequences_(0) {} /******************************************************************************/ CompressedVectorSiteContainer::CompressedVectorSiteContainer(const CompressedVectorSiteContainer& vsc) : AbstractSequenceContainer(vsc), sites_(vsc.sites_.size()), index_(vsc.index_), names_(vsc.names_), comments_(vsc.getNumberOfSequences()), sequences_(vsc.getNumberOfSequences()) { // Now try to add each site: sites_.resize(vsc.sites_.size()); for (size_t i = 0; i < vsc.sites_.size(); i++) { sites_[i] = dynamic_cast(vsc.sites_[i]->clone()); } for (size_t i = 0; i < vsc.getNumberOfSites(); i++) { addSite(vsc.getSite(i), false); // We assume that positions are correct. } // Seq comments: for (size_t i = 0; i < vsc.getNumberOfSequences(); i++) { comments_[i] = new Comments(vsc.getComments(i)); } } /******************************************************************************/ CompressedVectorSiteContainer::CompressedVectorSiteContainer(const SiteContainer& sc) : AbstractSequenceContainer(sc.getAlphabet()), sites_(0), index_(0), names_(sc.getSequencesNames()), comments_(sc.getNumberOfSequences()), sequences_(sc.getNumberOfSequences()) { // Now try to add each site: for (size_t i = 0; i < sc.getNumberOfSites(); i++) { addSite(sc.getSite(i), false); // We assume that positions are correct. } // Seq comments: for (size_t i = 0; i < sc.getNumberOfSequences(); i++) { comments_[i] = new Comments(sc.getComments(i)); } } /******************************************************************************/ CompressedVectorSiteContainer& CompressedVectorSiteContainer::operator=(const CompressedVectorSiteContainer& vsc) { AbstractSequenceContainer::operator=(vsc); // Seq names: names_ = vsc.names_; // Now try to add each site: sites_.resize(vsc.sites_.size()); for (size_t i = 0; i < vsc.sites_.size(); i++) { sites_[i] = dynamic_cast(vsc.sites_[i]->clone()); } index_ = vsc.index_; // Seq comments: size_t nbSeq = vsc.getNumberOfSequences(); comments_.resize(nbSeq); for (size_t i = 0; i < nbSeq; i++) { comments_[i] = new Comments(vsc.getComments(i)); } sequences_.resize(nbSeq); return *this; } /******************************************************************************/ CompressedVectorSiteContainer& CompressedVectorSiteContainer::operator=(const SiteContainer& sc) { clear(); AbstractSequenceContainer::operator=(sc); // Seq names: names_ = sc.getSequencesNames(); // Now try to add each site: for (size_t i = 0; i < sc.getNumberOfSites(); i++) { addSite(sc.getSite(i), false); // We assume that positions are correct. } // Seq comments: size_t nbSeq = sc.getNumberOfSequences(); comments_.resize(nbSeq); for (size_t i = 0; i < nbSeq; i++) { comments_[i] = new Comments(sc.getComments(i)); } sequences_.resize(nbSeq); return *this; } /******************************************************************************/ const Site& CompressedVectorSiteContainer::getSite(size_t i) const throw (IndexOutOfBoundsException) { if (i >= getNumberOfSites()) throw IndexOutOfBoundsException("CompressedVectorSiteContainer::getSite.", i, 0, getNumberOfSites() - 1); return *sites_[index_[i]]; } /******************************************************************************/ void CompressedVectorSiteContainer::setSite(size_t pos, const Site& site, bool checkPositions) throw (Exception) { if (pos >= getNumberOfSites()) throw IndexOutOfBoundsException("CompressedVectorSiteContainer::setSite.", pos, 0, getNumberOfSites() - 1); // Check size: if (site.size() != getNumberOfSequences()) throw SiteException("AlignedSequenceContainer::addSite. Site does not have the appropriate length", &site); // New site's alphabet and site container's alphabet matching verification if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("CompressedVectorSiteContainer::setSite", getAlphabet(), site.getAlphabet()); size_t current = index_[pos]; size_t siteIndex = getSiteIndex_(site); if (siteIndex == current) { //Nothing to do here, this is the same site. } else if (siteIndex < sites_.size()) { //The new site is already in the list, si we just update the index: index_[pos] = siteIndex; //We have to check if the previous pattern was unique, and if so, remove it and update indices: bool test = true; for (size_t i = 0; test && i < index_.size(); ++i) { if (index_[i] == current) { //There is another site, so nothing to do... test = false; } } if (test) { //There was no other site pointing toward this pattern, so we remove it. delete sites_[current]; sites_.erase(sites_.begin() + current); //Now we have to correct all indices: for (size_t i = 0; i < index_.size(); ++i) { if (index_[i] > current) index_[i]--; } } } else { //This is a new pattern, and we have to add it to the list... Site* copy = dynamic_cast(site.clone()); //Now we have to check if the previous pattern was unique, and if so, //replace it with the new one. Otherwise, add the new site at the end of the list. bool test = true; for (size_t i = 0; test && i < index_.size(); ++i) { if (i != pos && index_[i] == current) { //There is another site, so nothing to do... test = false; } } if (test) { //There was no other site pointing toward this pattern, so we remove it. delete sites_[current]; sites_[current] = copy; } else { //We add the site at the end: sites_.push_back(copy); index_[pos] = siteIndex; } } } /******************************************************************************/ Site* CompressedVectorSiteContainer::removeSite(size_t i) throw (IndexOutOfBoundsException) { if (i >= getNumberOfSites()) throw IndexOutOfBoundsException("CompressedVectorSiteContainer::removeSite.", i, 0, getNumberOfSites() - 1); //Here we return a copy of the site, as it will not necessarily be removed from the set, so we don't want to delete it. Site* site = dynamic_cast(sites_[index_[i]]->clone()); deleteSite(i); return site; } /******************************************************************************/ void CompressedVectorSiteContainer::deleteSite(size_t siteIndex) throw (IndexOutOfBoundsException) { if (siteIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("CompressedVectorSiteContainer::deleteSite.", siteIndex, 0, getNumberOfSites() - 1); //Here we need to check whether the pattern corresponding to this site is unique: size_t current = index_[siteIndex]; bool test = true; for (size_t j = 0; test && j < index_.size(); ++j) { if (j != siteIndex && index_[j] == current) { //There is a nother site, so nothing to... test = false; } } if (test) { //There was no other site pointing toward this pattern, so we remove it. delete sites_[current]; sites_.erase(sites_.begin() + current); //Now we have to correct all indices: for (size_t j = 0; j < index_.size(); ++j) { if (index_[j] > current) index_[j]--; } } index_.erase(index_.begin() + siteIndex); } /******************************************************************************/ void CompressedVectorSiteContainer::deleteSites(size_t siteIndex, size_t length) throw (IndexOutOfBoundsException) { //This may be optimized later: for (size_t i = 0; i < length; ++i) { deleteSite(siteIndex + i); } } /******************************************************************************/ void CompressedVectorSiteContainer::addSite(const Site& site, bool checkPositions) throw (Exception) { // Check size: if (site.size() != getNumberOfSequences()) throw SiteException("CompressedVectorSiteContainer::addSite. Site does not have the appropriate length", &site); // New site's alphabet and site container's alphabet matching verification if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) { throw AlphabetMismatchException("CompressedVectorSiteContainer::addSite", getAlphabet(), site.getAlphabet()); } size_t siteIndex = getSiteIndex_(site); if (siteIndex == sites_.size()) { //This is a new pattern: Site* copy = dynamic_cast(site.clone()); sites_.push_back(copy); } index_.push_back(siteIndex); } /******************************************************************************/ void CompressedVectorSiteContainer::addSite(const Site& site, size_t siteIndex, bool checkPositions) throw (Exception) { if (siteIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("CompressedVectorSiteContainer::addSite", siteIndex, 0, getNumberOfSites() - 1); // Check size: if (site.size() != getNumberOfSequences()) throw SiteException("CompressedVectorSiteContainer::addSite. Site does not have the appropriate length", &site); // New site's alphabet and site container's alphabet matching verification if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) { throw AlphabetMismatchException("CompressedVectorSiteContainer::addSite", getAlphabet(), site.getAlphabet()); } size_t index = getSiteIndex_(site); if (index == sites_.size()) { //This is a new pattern: Site* copy = dynamic_cast(site.clone()); sites_.push_back(copy); } index_.insert(index_.begin() + siteIndex, index); } /******************************************************************************/ void CompressedVectorSiteContainer::reindexSites() { int pos = 1; // first position is 1. for (vector::iterator i = sites_.begin(); i < sites_.end(); i++) { (*i)->setPosition(pos++); } } /******************************************************************************/ Vint CompressedVectorSiteContainer::getSitePositions() const { size_t n = getNumberOfSites(); Vint positions(n); for (size_t i = 0; i < n; i++) { positions[i] = sites_[index_[i]]->getPosition(); } return positions; } /******************************************************************************/ const Sequence& CompressedVectorSiteContainer::getSequence(size_t i) const throw (IndexOutOfBoundsException) { if (i >= getNumberOfSequences()) throw IndexOutOfBoundsException("CompressedVectorSiteContainer::getSequence.", i, 0, getNumberOfSequences() - 1); // Main loop : for all sites size_t n = getNumberOfSites(); vector sequence(n); for (size_t j = 0; j < n; j++) { sequence[j] = sites_[index_[j]]->getContent()[i]; } if (sequences_[i]) delete sequences_[i]; sequences_[i] = new BasicSequence(names_[i], sequence, *comments_[i], getAlphabet()); return *sequences_[i]; } /******************************************************************************/ const Sequence& CompressedVectorSiteContainer::getSequence(const std::string& name) const throw (SequenceNotFoundException) { // Look for sequence name: size_t pos = getSequencePosition(name); return getSequence(pos); } /******************************************************************************/ bool CompressedVectorSiteContainer::hasSequence(const string& name) const { //Look for sequence name: for (size_t pos = 0; pos < names_.size(); pos++) { if (names_[pos] == name) return true; } return false; } /******************************************************************************/ size_t CompressedVectorSiteContainer::getSequencePosition(const std::string& name) const throw (SequenceNotFoundException) { // Look for sequence name: for (size_t pos = 0; pos < names_.size(); pos++) { if (names_[pos] == name) return pos; } throw SequenceNotFoundException("CompressedVectorSiteContainer::getSequencePosition().", name); } /******************************************************************************/ void CompressedVectorSiteContainer::clear() { // Must delete all sites in the container: for (size_t i = 0; i < sites_.size(); i++) { delete sites_[i]; } // must delete all comments too: for (size_t i = 0; i < comments_.size(); i++) { if (comments_[i]) delete comments_[i]; } // Delete all sequences retrieved: for (size_t i = 0; i < sequences_.size(); i++) { if (sequences_[i]) delete (sequences_[i]); } // Delete all sites pointers sites_.clear(); index_.clear(); names_.clear(); comments_.clear(); sequences_.clear(); } /******************************************************************************/ vector CompressedVectorSiteContainer::getSequencesNames() const { vector seqnames(names_.size()); for (size_t i = 0; i < names_.size(); i++) { seqnames[i] = names_[i]; } return seqnames; } /******************************************************************************/ void CompressedVectorSiteContainer::setSequencesNames( const vector& names, bool checkNames) throw (Exception) { if (names.size() != getNumberOfSequences()) throw IndexOutOfBoundsException("CompressedVectorSiteContainer::setSequenceNames: bad number of names.", names.size(), getNumberOfSequences(), getNumberOfSequences()); if (checkNames) { for (size_t i = 0; i < names.size(); i++) { // For all names in vector : throw exception if name already exists for (size_t j = 0; j < i; j++) { if (names[j] == names[i]) throw Exception("CompressedVectorSiteContainer::setSequencesNames : Sequence's name already exists in container"); } } } for (size_t i = 0; i < names.size(); i++) { names_[i] = names[i]; } } /******************************************************************************/ void CompressedVectorSiteContainer::setComments(size_t sequenceIndex, const Comments& comments) throw (IndexOutOfBoundsException) { comments_[sequenceIndex] = new Comments(comments); } /******************************************************************************/ CompressedVectorSiteContainer* CompressedVectorSiteContainer::createEmptyContainer() const { CompressedVectorSiteContainer* vsc = new CompressedVectorSiteContainer(getAlphabet()); vsc->setGeneralComments(getGeneralComments()); return vsc; } /******************************************************************************/ size_t CompressedVectorSiteContainer::getSiteIndex_(const Site& site) { size_t pos = sites_.size(); bool test; for (size_t i = 0; i < sites_.size(); ++i) { test = true; for (size_t j = 0; test && j < site.size(); ++j) //site is supposed to have the correct size, that is the same as all the ones in the container. { if (site[j] != (*sites_[i])[j]) test = false; } if (test) { pos = i; break; } } return pos; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Container/OrderedSequenceContainer.h000644 000000 000000 00000022734 12147656566 024144 0ustar00rootroot000000 000000 // // File OrderedSequenceContainer.h // Author: Guillaume Deuchst // Julien Dutheil // Last modification : Tuesday August 7 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ORDEREDSEQUENCECONTAINER_H_ #define _ORDEREDSEQUENCECONTAINER_H_ #include "../Sequence.h" #include "SequenceContainer.h" namespace bpp { /** * @brief The OrderedSequenceContainer interface. * * Interface to manage indexed containers. * Sequences may be accessed via their indice, i.e. their position in the container. */ class OrderedSequenceContainer: public virtual SequenceContainer { public: OrderedSequenceContainer() {} virtual ~OrderedSequenceContainer() {} public: /** * @brief Get the content of a sequence. * * @param sequenceIndex The position of the sequence. * @return The content of the sequence as a vector of integers. * @throw IndexOutOfBoundsException If the position does not match any sequence in the container. */ virtual const std::vector& getContent(size_t sequenceIndex) const throw (IndexOutOfBoundsException) = 0; /** * @brief Convert a particular sequence to a string. * * @param sequenceIndex The position of the sequence. * @return A string describing the content of the sequence. * @throw IndexOutOfBoundsException If the position does not match any sequence in the container. */ virtual std::string toString(size_t sequenceIndex) const throw (IndexOutOfBoundsException) = 0; /** * @brief Retrieve a sequence object from the container. * * @param sequenceIndex The position of the sequence. * @return A reference toward the Sequence object with corresponding name. * @throw IndexOutOfBoundsException If the position does not match any sequence in the container. */ virtual const Sequence& getSequence(size_t sequenceIndex) const throw (IndexOutOfBoundsException) = 0; /** * @brief Replace a sequence in the container. * * @param sequenceIndex The position of the sequence. * @param sequence The sequence to add. * @param checkName Tell if the container must check if the name of the sequence * is already used in the container before adding it. * @throw IndexOutOfBoundsException If the position does not match any sequence in the container. * @throw Exception Any other kind of exception. */ virtual void setSequence(size_t sequenceIndex, const Sequence& sequence, bool checkName) throw (Exception) = 0; /** * @brief Extract (and remove) a sequence from the container. * * @param sequenceIndex The position of the sequence. * @throw IndexOutOfBoundsException If the name does not match any sequence in * the container. */ virtual Sequence* removeSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException, Exception) = 0; /** * @brief Delete a sequence of the container. * * @param sequenceIndex The position of the sequence. * @throw IndexOutOfBoundsException If the position does not match any sequence in * the container. */ virtual void deleteSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException, Exception) = 0; /** * @brief Get the name of a particular sequence. * * @param sequenceIndex The position of the sequence. * @return The name of the sequence at position 'sequenceIndex'. * @throw IndexOutOfBoundsException If the position does not match any sequence in * the container. */ virtual const std::string& getName(size_t sequenceIndex) const throw (IndexOutOfBoundsException) = 0; /** * @brief Get comments of a particular sequence. * * @param sequenceIndex The position of the sequence. * @return The comments associated to sequence at position 'sequenceIndex'. * @throw IndexOutOfBoundsException If the position does not match any sequence in * the container. */ virtual const Comments& getComments(size_t sequenceIndex) const throw (IndexOutOfBoundsException) = 0; /** * @brief Set the comments of a particular sequence. * * @param sequenceIndex The position of the sequence. * @param comments The comments to set to sequence with position 'i'. * @throw IndexOutOfBoundsException If the position does not match any sequence in * the container. */ virtual void setComments(size_t sequenceIndex, const Comments & comments) throw (IndexOutOfBoundsException) = 0; /** * @brief Get the position of a sequence in sequence container from its name. * * @param name The name of the sequence. * @return The position of the sequence with name 'name', if it exists. * @throw SequenceNotFoundException If no sequence with name 'name' could be found. */ virtual size_t getSequencePosition(const std::string & name) const throw (SequenceNotFoundException) = 0; /** * @name Provide direct access to sequences content. * * @warning These operators allow you to modifiy the content of the sequences. * No checking is performed for your modifications, so use with care, or * consider using the setContent() methods. * * @{ */ /** * @brief Element access operator. * * Allows direct access to the data stored in the container. * * @param sequenceIndex The sequence position. * @param elementIndex The element position within the sequence. * @throw IndexOutOfBoundsException If a position is not valid. */ virtual int& valueAt(size_t sequenceIndex, size_t elementIndex) throw (IndexOutOfBoundsException) = 0; /** * @brief Element access operator. * * Allows direct access to the data stored in the container. * * @param sequenceIndex The sequence position. * @param elementIndex The element position within the sequence. * @throw IndexOutOfBoundsException If a position is not valid. */ virtual const int& valueAt(size_t sequenceIndex, size_t elementIndex) const throw (IndexOutOfBoundsException) = 0; /** * @brief Element access operator. * * Allows direct access to the data stored in the container. * This method is faster then the valueAt function, but input * parameters are not checked! * * @param sequenceIndex The sequence position. * @param elementIndex The element position within the sequence. */ virtual int& operator()(size_t sequenceIndex, size_t elementIndex) = 0; /** * @brief Element access operator. * * Allows direct access to the data stored in the container. * This method is faster then the valueAt function, but input * parameters are not checked! * * @param sequenceIndex The sequence position. * @param elementIndex The element position within the sequence. */ virtual const int& operator()(size_t sequenceIndex, size_t elementIndex) const = 0; /** @} */ public: /** * @name SequenceContainer methods. * * @{ */ virtual const std::vector& getContent(const std::string& name) const throw (SequenceNotFoundException) = 0; virtual std::string toString(const std::string& name) const throw (SequenceNotFoundException) = 0; virtual const Sequence& getSequence(const std::string& name) const throw (SequenceNotFoundException) = 0; virtual void setSequence(const std::string& name, const Sequence& sequence, bool checkName) throw (Exception) = 0; virtual Sequence* removeSequence(const std::string& name) throw (SequenceNotFoundException, Exception) = 0; virtual void deleteSequence(const std::string& name) throw (SequenceNotFoundException, Exception) = 0; virtual size_t getNumberOfSequences() const = 0; virtual std::vector getSequencesNames() const = 0; virtual void setSequencesNames(const std::vector & names, bool checkNames) throw (Exception) = 0; virtual const Comments& getComments(const std::string& name) const throw (SequenceNotFoundException) = 0; virtual void setComments(const std::string& name, const Comments& comments) throw (SequenceNotFoundException) = 0; /** @} */ }; } //end of namespace bpp. #endif //_ORDEREDSEQUENCECONTAINER_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/VectorSiteContainer.cpp000644 000000 000000 00000065763 12147656566 023522 0ustar00rootroot000000 000000 // // File: VectorSiteContainer.cpp // Created by: Julien Dutheil // Created on: Mon Oct 6 11:50:40 2003 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "VectorSiteContainer.h" #include using namespace std; #include using namespace bpp; /** Class constructors: *******************************************************/ VectorSiteContainer::VectorSiteContainer( const std::vector& vs, const Alphabet* alpha, bool checkPositions) throw (Exception) : AbstractSequenceContainer(alpha), sites_(0), names_(0), comments_(0), sequences_(0) { if (vs.size() == 0) throw Exception("VectorSiteContainer::VectorSiteContainer. Empty site set."); // Seq names and comments: size_t nbSeq = vs[0]->size(); names_.resize(nbSeq); comments_.resize(nbSeq); for (size_t i = 0; i < nbSeq; i++) { names_[i] = "Seq_" + TextTools::toString(i); comments_[i] = new Comments(); } // Now try to add each site: for (size_t i = 0; i < vs.size(); i++) { addSite(*vs[i], checkPositions); // This may throw an exception if position argument already exists or is size is not valid. } sequences_.resize(nbSeq); } /******************************************************************************/ VectorSiteContainer::VectorSiteContainer(size_t size, const Alphabet* alpha) : AbstractSequenceContainer(alpha), sites_(0), names_(size), comments_(size), sequences_(size) { // Seq names and comments: for (size_t i = 0; i < size; i++) { names_[i] = string("Seq_") + TextTools::toString(i); comments_[i] = new Comments(); } } /******************************************************************************/ VectorSiteContainer::VectorSiteContainer(const std::vector& names, const Alphabet* alpha) : AbstractSequenceContainer(alpha), sites_(0), names_(names.size()), comments_(names.size()), sequences_(names.size()) { // Seq names and comments: for (size_t i = 0; i < names.size(); i++) { names_[i] = names[i]; comments_[i] = new Comments(); } } /******************************************************************************/ VectorSiteContainer::VectorSiteContainer(const Alphabet* alpha) : AbstractSequenceContainer(alpha), sites_(0), names_(0), comments_(0), sequences_(0) {} /******************************************************************************/ VectorSiteContainer::VectorSiteContainer(const VectorSiteContainer& vsc) : AbstractSequenceContainer(vsc), sites_(0), names_(vsc.names_), comments_(vsc.getNumberOfSequences()), sequences_(vsc.getNumberOfSequences()) { // Now try to add each site: for (size_t i = 0; i < vsc.getNumberOfSites(); i++) { addSite(vsc.getSite(i), false); // We assume that positions are correct. } // Seq comments: for (size_t i = 0; i < vsc.getNumberOfSequences(); i++) { comments_[i] = new Comments(vsc.getComments(i)); } } /******************************************************************************/ VectorSiteContainer::VectorSiteContainer(const SiteContainer& sc) : AbstractSequenceContainer(sc), sites_(0), names_(sc.getSequencesNames()), comments_(sc.getNumberOfSequences()), sequences_(sc.getNumberOfSequences()) { // Now try to add each site: for (size_t i = 0; i < sc.getNumberOfSites(); i++) { addSite(sc.getSite(i), false); // We assume that positions are correct. } // Seq comments: for (size_t i = 0; i < sc.getNumberOfSequences(); i++) { comments_[i] = new Comments(sc.getComments(i)); } } /******************************************************************************/ VectorSiteContainer::VectorSiteContainer(const OrderedSequenceContainer& osc) : AbstractSequenceContainer(osc), sites_(0), names_(0), comments_(0), sequences_(0) { for (size_t i = 0; i < osc.getNumberOfSequences(); i++) { addSequence(osc.getSequence(i), false); } reindexSites(); } /******************************************************************************/ VectorSiteContainer::VectorSiteContainer(const SequenceContainer& sc) : AbstractSequenceContainer(sc), sites_(0), names_(0), comments_(0), sequences_(0) { vector names = sc.getSequencesNames(); for (size_t i = 0; i < names.size(); i++) { addSequence(sc.getSequence(names[i]), false); } reindexSites(); } /******************************************************************************/ VectorSiteContainer& VectorSiteContainer::operator=(const VectorSiteContainer& vsc) { clear(); AbstractSequenceContainer::operator=(vsc); // Seq names: names_.resize(vsc.getNumberOfSequences()); setSequencesNames(vsc.getSequencesNames(), true); // Now try to add each site: for (size_t i = 0; i < vsc.getNumberOfSites(); i++) { addSite(vsc.getSite(i), false); // We assume that positions are correct. } // Seq comments: size_t nbSeq = vsc.getNumberOfSequences(); comments_.resize(nbSeq); for (size_t i = 0; i < nbSeq; i++) { comments_[i] = new Comments(vsc.getComments(i)); } sequences_.resize(nbSeq); return *this; } /******************************************************************************/ VectorSiteContainer& VectorSiteContainer::operator=(const SiteContainer& sc) { clear(); AbstractSequenceContainer::operator=(sc); // Seq names: names_.resize(sc.getNumberOfSequences()); setSequencesNames(sc.getSequencesNames(), true); // Now try to add each site: for (size_t i = 0; i < sc.getNumberOfSites(); i++) { addSite(sc.getSite(i), false); // We assume that positions are correct. } // Seq comments: size_t nbSeq = sc.getNumberOfSequences(); comments_.resize(nbSeq); for (size_t i = 0; i < nbSeq; i++) { comments_[i] = new Comments(sc.getComments(i)); } sequences_.resize(nbSeq); return *this; } /******************************************************************************/ VectorSiteContainer& VectorSiteContainer::operator=(const OrderedSequenceContainer& osc) { clear(); AbstractSequenceContainer::operator=(osc); size_t nbSeq = osc.getNumberOfSequences(); for (size_t i = 0; i < nbSeq; i++) { addSequence(osc.getSequence(i), false); } reindexSites(); return *this; } /******************************************************************************/ VectorSiteContainer& VectorSiteContainer::operator=(const SequenceContainer& sc) { clear(); AbstractSequenceContainer::operator=(sc); vector names = sc.getSequencesNames(); for (size_t i = 0; i < names.size(); i++) { addSequence(sc.getSequence(names[i]), false); } reindexSites(); return *this; } /******************************************************************************/ const Site& VectorSiteContainer::getSite(size_t i) const throw (IndexOutOfBoundsException) { if (i >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::getSite.", i, 0, getNumberOfSites() - 1); return *sites_[i]; } /******************************************************************************/ void VectorSiteContainer::setSite(size_t pos, const Site& site, bool checkPositions) throw (Exception) { if (pos >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::setSite.", pos, 0, getNumberOfSites() - 1); // Check size: if (site.size() != getNumberOfSequences()) throw SiteException("AlignedSequenceContainer::addSite. Site does not have the appropriate length", &site); // New site's alphabet and site container's alphabet matching verification if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("VectorSiteContainer::setSite", getAlphabet(), site.getAlphabet()); // Check position: if (checkPositions) { int position = site.getPosition(); // For all positions in vector : throw exception if position already exists for (size_t i = 0; i < sites_.size(); i++) { if (sites_[i]->getPosition() == position) throw SiteException("VectorSiteContainer::setSite: Site position already exists in container", &site); } } delete sites_[pos]; sites_[pos] = dynamic_cast(site.clone()); } /******************************************************************************/ Site* VectorSiteContainer::removeSite(size_t i) throw (IndexOutOfBoundsException) { if (i >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::removeSite.", i, 0, getNumberOfSites() - 1); Site* site = sites_[i]; sites_.erase(sites_.begin() + i); return site; } /******************************************************************************/ void VectorSiteContainer::deleteSite(size_t i) throw (IndexOutOfBoundsException) { if (i >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::deleteSite.", i, 0, getNumberOfSites() - 1); delete sites_[i]; sites_.erase(sites_.begin() + i); } /******************************************************************************/ void VectorSiteContainer::deleteSites(size_t siteIndex, size_t length) throw (IndexOutOfBoundsException) { if (siteIndex + length > getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::deleteSites.", siteIndex + length, 0, getNumberOfSites() - 1); for (size_t i = siteIndex; i < siteIndex + length; ++i) { delete sites_[i]; } sites_.erase(sites_.begin() + siteIndex, sites_.begin() + siteIndex + length); } /******************************************************************************/ void VectorSiteContainer::addSite(const Site& site, bool checkPositions) throw (Exception) { // Check size: if (site.size() != getNumberOfSequences()) throw SiteException("VectorSiteContainer::addSite. Site does not have the appropriate length", &site); // New site's alphabet and site container's alphabet matching verification if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) { throw AlphabetMismatchException("VectorSiteContainer::addSite", getAlphabet(), site.getAlphabet()); } // Check position: if (checkPositions) { int position = site.getPosition(); // For all positions in vector : throw exception if position already exists for (size_t i = 0; i < sites_.size(); i++) { if (sites_[i]->getPosition() == position) throw SiteException("VectorSiteContainer::addSite. Site position already exists in container", &site); } } sites_.push_back(dynamic_cast(site.clone())); } /******************************************************************************/ void VectorSiteContainer::addSite(const Site& site, int position, bool checkPositions) throw (Exception) { // Check size: if (site.size() != getNumberOfSequences()) throw SiteException("VectorSiteContainer::addSite. Site does not have the appropriate length", &site); // New site's alphabet and site container's alphabet matching verification if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) { throw AlphabetMismatchException("VectorSiteContainer::addSite", getAlphabet(), site.getAlphabet()); } // Check position: if (checkPositions) { // For all positions in vector : throw exception if position already exists for (size_t i = 0; i < sites_.size(); i++) { if (sites_[i]->getPosition() == position) throw SiteException("VectorSiteContainer::addSite. Site position already exists in container", &site); } } Site* copy = dynamic_cast(site.clone()); copy->setPosition(position); sites_.push_back(copy); } /******************************************************************************/ void VectorSiteContainer::addSite(const Site& site, size_t siteIndex, bool checkPositions) throw (Exception) { if (siteIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::addSite", siteIndex, 0, getNumberOfSites() - 1); // Check size: if (site.size() != getNumberOfSequences()) throw SiteException("VectorSiteContainer::addSite. Site does not have the appropriate length", &site); // New site's alphabet and site container's alphabet matching verification if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) { throw AlphabetMismatchException("VectorSiteContainer::addSite", getAlphabet(), site.getAlphabet()); } // Check position: if (checkPositions) { int position = site.getPosition(); // For all positions in vector : throw exception if position already exists for (size_t i = 0; i < sites_.size(); i++) { if (sites_[i]->getPosition() == position) throw SiteException("VectorSiteContainer::addSite. Site position already exists in container", &site); } } // insert(begin() + pos, new Site(site)); sites_.insert(sites_.begin() + siteIndex, dynamic_cast(site.clone())); } /******************************************************************************/ void VectorSiteContainer::addSite(const Site& site, size_t siteIndex, int position, bool checkPositions) throw (Exception) { if (siteIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::addSite", siteIndex, 0, getNumberOfSites() - 1); // Check size: if (site.size() != getNumberOfSequences()) throw SiteException("VectorSiteContainer::addSite. Site does not have the appropriate length", &site); // New site's alphabet and site container's alphabet matching verification if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) { throw AlphabetMismatchException("VectorSiteContainer::addSite", getAlphabet(), site.getAlphabet()); } // Check position: if (checkPositions) { // For all positions in vector : throw exception if position already exists for (size_t i = 0; i < sites_.size(); i++) { if (sites_[i]->getPosition() == position) throw SiteException("VectorSiteContainer::addSite. Site position already exists in container", &site); } } Site* copy = dynamic_cast(site.clone()); copy->setPosition(position); sites_.insert(sites_.begin() + siteIndex, copy); } /******************************************************************************/ size_t VectorSiteContainer::getNumberOfSites() const { return sites_.size(); } /******************************************************************************/ void VectorSiteContainer::reindexSites() { int pos = 1; // first position is 1. for (vector::iterator i = sites_.begin(); i < sites_.end(); i++) { (*i)->setPosition(pos++); } } /******************************************************************************/ Vint VectorSiteContainer::getSitePositions() const { Vint positions(sites_.size()); for (size_t i = 0; i < sites_.size(); i++) { positions[i] = sites_[i]->getPosition(); } return positions; } /******************************************************************************/ const Sequence& VectorSiteContainer::getSequence(size_t i) const throw (IndexOutOfBoundsException) { if (i >= getNumberOfSequences()) throw IndexOutOfBoundsException("VectorSiteContainer::getSequence.", i, 0, getNumberOfSequences() - 1); // Main loop : for all sites size_t n = getNumberOfSites(); vector sequence(n); for (size_t j = 0; j < n; j++) { sequence[j] = sites_[j]->getContent()[i]; } if (sequences_[i]) delete sequences_[i]; sequences_[i] = new BasicSequence(names_[i], sequence, *comments_[i], getAlphabet()); return *sequences_[i]; } /******************************************************************************/ const Sequence& VectorSiteContainer::getSequence(const string& name) const throw (SequenceNotFoundException) { // Look for sequence name: size_t pos = getSequencePosition(name); return getSequence(pos); } /******************************************************************************/ bool VectorSiteContainer::hasSequence(const string& name) const { // Look for sequence name: for (size_t pos = 0; pos < names_.size(); pos++) { if (names_[pos] == name) return true; } return false; } /******************************************************************************/ size_t VectorSiteContainer::getSequencePosition(const string& name) const throw (SequenceNotFoundException) { // Look for sequence name: for (size_t pos = 0; pos < names_.size(); pos++) { if (names_[pos] == name) return pos; } throw SequenceNotFoundException("VectorSiteContainer::getSequencePosition().", name); } /******************************************************************************/ void VectorSiteContainer::setSequence(const string& name, const Sequence& sequence, bool checkNames) throw (Exception) { // Look for sequence name: size_t pos = getSequencePosition(name); setSequence(pos, sequence, checkNames); } /******************************************************************************/ void VectorSiteContainer::setSequence(size_t pos, const Sequence& sequence, bool checkNames) throw (Exception) { if (pos >= getNumberOfSequences()) throw IndexOutOfBoundsException("VectorSiteContainer::setSequence", pos, 0, getNumberOfSequences() - 1); // New sequence's alphabet and site container's alphabet matching verification if (sequence.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("VectorSiteContainer::addSite", getAlphabet(), sequence.getAlphabet()); // If the container has only one sequence, we set the size to the size of this sequence: if (getNumberOfSequences() == 1) realloc(sequence.size()); if (sequence.size() != sites_.size()) throw SequenceException("VectorSiteContainer::setSequence. Sequence has not the appropriate length.", &sequence); if (checkNames) { for (size_t i = 0; i < names_.size(); i++) { if (i != pos && sequence.getName() == names_[i]) throw SequenceException("VectorSiteContainer::settSequence. Name already exists in container.", &sequence); } } // Update name: names_[pos] = sequence.getName(); // Update elements at each site: for (size_t i = 0; i < sites_.size(); i++) { sites_[i]->setElement(pos, sequence.getValue(i)); } // Update comments: if (comments_[pos]) delete comments_[pos]; comments_[pos] = new Comments(sequence.getComments()); // Update sequences: if (sequences_[pos]) delete sequences_[pos]; sequences_[pos] = 0; } /******************************************************************************/ Sequence* VectorSiteContainer::removeSequence(size_t i) throw (IndexOutOfBoundsException) { if (i >= getNumberOfSequences()) throw IndexOutOfBoundsException("VectorSiteContainer::removeSequence.", i, 0, getNumberOfSequences() - 1); getSequence(i); // Actuallizes pointer. Sequence* sequence = sequences_[i]; for (size_t j = 0; j < sites_.size(); j++) { // For each site: sites_[j]->deleteElement(i); } // Now actualize names and comments: names_.erase(names_.begin() + i); if (comments_[i]) delete comments_[i]; comments_.erase(comments_.begin() + i); // We remove the sequence, so the destruction of the sequence is up to the user: // if (sequences_[i] != 0) delete sequences_[i]; sequences_.erase(sequences_.begin() + i); return sequence; } /******************************************************************************/ Sequence* VectorSiteContainer::removeSequence(const string& name) throw (SequenceNotFoundException) { // Look for sequence name: size_t pos = getSequencePosition(name); return removeSequence(pos); } /******************************************************************************/ void VectorSiteContainer::deleteSequence(size_t i) throw (IndexOutOfBoundsException) { if (i >= getNumberOfSequences()) throw IndexOutOfBoundsException("VectorSiteContainer::demeteSequence.", i, 0, getNumberOfSequences() - 1); for (size_t j = 0; j < sites_.size(); j++) { sites_[j]->deleteElement(i); } // Now actualize names and comments: names_.erase(names_.begin() + i); if (comments_[i]) delete comments_[i]; comments_.erase(comments_.begin() + i); if (sequences_[i]) delete sequences_[i]; sequences_.erase(sequences_.begin() + i); } /******************************************************************************/ void VectorSiteContainer::deleteSequence(const string& name) throw (SequenceNotFoundException) { // Look for sequence name: size_t pos = getSequencePosition(name); deleteSequence(pos); } /******************************************************************************/ void VectorSiteContainer::addSequence(const Sequence& sequence, bool checkNames) throw (Exception) { // If the container has no sequence, we set the size to the size of this sequence: if (getNumberOfSequences() == 0) realloc(sequence.size()); // New sequence's alphabet and site container's alphabet matching verification if (sequence.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("VectorSiteContainer::addSequence", getAlphabet(), sequence.getAlphabet()); if (sequence.size() != sites_.size()) throw SequenceException("VectorSiteContainer::addSequence. Sequence has not the appropriate length: " + TextTools::toString(sequence.size()) + ", should be " + TextTools::toString(sites_.size()) + ".", &sequence); if (checkNames) { for (size_t i = 0; i < names_.size(); i++) { if (sequence.getName() == names_[i]) throw SequenceException("VectorSiteContainer::addSequence. Name already exists in container.", &sequence); } } // Append name: names_.push_back(sequence.getName()); // Append elements at each site: for (size_t i = 0; i < sites_.size(); i++) { sites_[i]->addElement(sequence.getValue(i)); } // Append comments: comments_.push_back(new Comments(sequence.getComments())); // Sequences pointers: sequences_.push_back(0); } /******************************************************************************/ void VectorSiteContainer::addSequence( const Sequence& sequence, size_t pos, bool checkNames) throw (Exception) { if (pos >= getNumberOfSequences()) throw IndexOutOfBoundsException("VectorSiteContainer::addSequence.", pos, 0, getNumberOfSequences() - 1); if (sequence.size() != sites_.size()) throw SequenceNotAlignedException("VectorSiteContainer::setSequence", &sequence); // New sequence's alphabet and site container's alphabet matching verification if (sequence.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) { throw AlphabetMismatchException("VectorSiteContainer::addSite", getAlphabet(), sequence.getAlphabet()); } if (checkNames) { for (size_t i = 0; i < names_.size(); i++) { if (sequence.getName() == names_[i]) throw SequenceException("VectorSiteContainer::addSequence. Name already exists in container.", &sequence); } } for (size_t i = 0; i < sites_.size(); i++) { // For each site: sites_[i]->addElement(pos, sequence.getValue(i)); } // Actualize names and comments: names_.insert(names_.begin() + pos, sequence.getName()); comments_.insert(comments_.begin() + pos, new Comments(sequence.getComments())); sequences_.insert(sequences_.begin() + pos, 0); } /******************************************************************************/ void VectorSiteContainer::clear() { // Must delete all sites in the container: for (size_t i = 0; i < sites_.size(); i++) { delete sites_[i]; } // must delete all comments too: for (size_t i = 0; i < comments_.size(); i++) { if (comments_[i] != 0) delete comments_[i]; } // Delete all sequences retrieved: for (size_t i = 0; i < sequences_.size(); i++) { if (sequences_[i] != 0) delete (sequences_[i]); } // Delete all sites pointers sites_.clear(); names_.clear(); comments_.clear(); sequences_.clear(); } /******************************************************************************/ void VectorSiteContainer::realloc(size_t n) { clear(); sites_.resize(n); for (size_t i = 0; i < n; i++) { sites_[i] = new Site(getAlphabet()); } reindexSites(); } /******************************************************************************/ vector VectorSiteContainer::getSequencesNames() const { return names_; } /******************************************************************************/ void VectorSiteContainer::setSequencesNames( const vector& names, bool checkNames) throw (Exception) { if (names.size() != getNumberOfSequences()) throw IndexOutOfBoundsException("VectorSiteContainer::setSequenceNames: bad number of names.", names.size(), getNumberOfSequences(), getNumberOfSequences()); if (checkNames) { for (size_t i = 0; i < names.size(); i++) { // For all names in vector : throw exception if name already exists for (size_t j = 0; j < i; j++) { if (names[j] == names[i]) throw Exception("VectorSiteContainer::setSequencesNames : Sequence's name already exists in container"); } } } for (size_t i = 0; i < names.size(); i++) { names_[i] = names[i]; } } /******************************************************************************/ void VectorSiteContainer::setComments(size_t sequenceIndex, const Comments& comments) throw (IndexOutOfBoundsException) { comments_[sequenceIndex] = new Comments(comments); } /******************************************************************************/ VectorSiteContainer* VectorSiteContainer::createEmptyContainer() const { VectorSiteContainer* vsc = new VectorSiteContainer(getAlphabet()); vsc->setGeneralComments(getGeneralComments()); return vsc; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Container/VectorSequenceContainer.h000644 000000 000000 00000023526 12147656566 024022 0ustar00rootroot000000 000000 // // File VectorSequenceContainer.h // Created by: Guillaume Deuchst // Julien Dutheil // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _VECTORSEQUENCECONTAINER_H_ #define _VECTORSEQUENCECONTAINER_H_ #include "../Alphabet/Alphabet.h" #include "../Sequence.h" #include "AbstractSequenceContainer.h" #include // From the STL: #include #include namespace bpp { /** * @brief The VectorSequenceContainer class. * * This is the simplest implementation of the OrderedSequenceContainer interface. * Sequences are stored in a std::vector of pointers. * The container is responsible for the creation and destruction of the sequence * objects it contains. */ class VectorSequenceContainer: public AbstractSequenceContainer { private: /** * @brief A std::vector of pointers toward the sequences stored in the container. */ mutable std::vector sequences_; public: /** * @brief Build a new container from a std::vector of pointers toward sequence objects. * * The addSequence() method is called uppon each Sequence object, hence each sequence is * copied into the container. * * @param vs The std::vector of pointers toward sequence objects. * @param alpha The alphabet to all sequences. * @throw AlphabetMismatchException if one sequence does not match the specified alphabet. */ VectorSequenceContainer( const std::vector& vs, const Alphabet* alpha) throw (AlphabetMismatchException); /** * @brief Build an empty container that will contain sequences of a particular alphabet. * * @param alpha The alphabet of the container. */ VectorSequenceContainer(const Alphabet* alpha): AbstractSequenceContainer(alpha), sequences_() {} /** * @name Copy contructors: * * @{ */ /** * @brief Copy from a VectorSequenceContainer. * * @param vsc The VectorSequenceContainer to copy into this container. */ VectorSequenceContainer(const VectorSequenceContainer& vsc); /** * @brief Copy from an OrderedSequenceContainer. * * @param osc The OrderedSequenceContainer to copy into this container. */ VectorSequenceContainer(const OrderedSequenceContainer& osc); /** * @brief Copy from a SequenceContainer. * * @param osc The SequenceContainer to copy into this container. */ VectorSequenceContainer(const SequenceContainer& osc); /** @} */ /** * @brief Assign from a VectorSequenceContainer. * * @param vsc The VectorSequenceContainer to copy into this container. */ VectorSequenceContainer& operator=(const VectorSequenceContainer& vsc); /** * @brief Copy from an OrderedSequenceContainer. * * @param osc The OrderedSequenceContainer to copy into this container. */ VectorSequenceContainer& operator=(const OrderedSequenceContainer& osc); /** * @brief Copy from a SequenceContainer. * * @param osc The SequenceContainer to copy into this container. */ VectorSequenceContainer& operator=(const SequenceContainer& osc); /** * @brief Container destructor: delete all sequences in the container. */ virtual ~VectorSequenceContainer() { clear(); } public: /** * @name The Clonable interface. * * @{ */ Clonable* clone() const { return new VectorSequenceContainer(*this); } /** @} */ /** * @name The SequenceContainer interface. * * @{ */ bool hasSequence(const std::string& name) const; const Sequence& getSequence(const std::string& name) const throw (SequenceNotFoundException); void setSequence(const std::string& name, const Sequence& sequence, bool checkName = true) throw (Exception) { setSequence(getSequencePosition(name), sequence, checkName); } Sequence* removeSequence(const std::string& name) throw (SequenceNotFoundException) { return removeSequence(getSequencePosition(name)); } void deleteSequence(const std::string& name) throw (SequenceNotFoundException) { deleteSequence(getSequencePosition(name)); } size_t getNumberOfSequences() const { return sequences_.size(); } std::vector getSequencesNames() const; void setSequencesNames(const std::vector& names, bool checkNames = true) throw (Exception); void clear(); VectorSequenceContainer * createEmptyContainer() const; int& valueAt(const std::string& sequenceName, size_t elementIndex) throw (SequenceNotFoundException, IndexOutOfBoundsException) { return getSequence_(sequenceName)[elementIndex]; } const int& valueAt(const std::string& sequenceName, size_t elementIndex) const throw (SequenceNotFoundException, IndexOutOfBoundsException) { return getSequence(sequenceName)[elementIndex]; } int& operator()(const std::string& sequenceName, size_t elementIndex) { return getSequence_(sequenceName)[elementIndex]; } const int& operator()(const std::string& sequenceName, size_t elementIndex) const { return getSequence(sequenceName)[elementIndex]; } int& valueAt(size_t sequenceIndex, size_t elementIndex) throw (IndexOutOfBoundsException) { return getSequence_(sequenceIndex)[elementIndex]; } const int& valueAt(size_t sequenceIndex, size_t elementIndex) const throw (IndexOutOfBoundsException) { return getSequence(sequenceIndex)[elementIndex]; } int& operator()(size_t sequenceIndex, size_t elementIndex) { return getSequence_(sequenceIndex)[elementIndex]; } const int & operator()(size_t sequenceIndex, size_t elementIndex) const { return getSequence(sequenceIndex)[elementIndex]; } /** @} */ /** * @name The OrderedSequenceContainer interface. * * @{ */ void setComments(const std::string & name, const Comments& comments) throw (SequenceNotFoundException) { AbstractSequenceContainer::setComments(name, comments); } void setComments(size_t sequenceIndex, const Comments& comments) throw (IndexOutOfBoundsException); size_t getSequencePosition(const std::string& name) const throw (SequenceNotFoundException); const Sequence& getSequence(size_t sequenceIndex) const throw (IndexOutOfBoundsException); void setSequence(size_t sequenceIndex, const Sequence& sequence, bool checkName = true) throw (Exception); Sequence* removeSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException); void deleteSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException); /** @} */ /** * @name Add sequence to this container. * * @{ */ /** * @brief Add a sequence at the end of the container. * * The sequence is copied into the container. * If checkNames is set to true, the method check if the name of the * sequence is already used in the container, and sends an exception if it * is the case. Otherwise, do not check the name: the method is hence faster, * but use it at your own risks! * * @param sequence The sequence to add. * @param checkName Tell if the method must check the name of the sequence * before adding it. * @throw Exception If the sequence couldn't be added to the container. */ virtual void addSequence(const Sequence& sequence, bool checkName = true) throw (Exception); /** * @brief Add a sequence to the container at a particular position. * * The sequence is copied into the container. * If checkName is set to true, the method check if the name of the * sequence is already used in the container, and sends an exception if it * is the case. Otherwise, do not check the name: the method is hence faster, * but use it at your own risks! * * @param sequence The sequence to add. * @param sequenceIndex The position where to insert the new sequence. * All the following sequences will be pushed. * @param checkName Tell if the method must check the name of the sequence * before adding it. * @throw Exception If the sequence couldn't be added to the container. */ virtual void addSequence(const Sequence& sequence, size_t sequenceIndex, bool checkName = true) throw (Exception); protected: /** * @name AbstractSequenceContainer methods. * * @{ */ Sequence& getSequence_(size_t i) throw (IndexOutOfBoundsException); Sequence& getSequence_(const std::string& name) throw (SequenceNotFoundException); /** @} */ }; } //end of namespace bpp. #endif // _VECTORSEQUENCECONTAINER_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/AbstractSequenceContainer.cpp000644 000000 000000 00000004254 12147656566 024653 0ustar00rootroot000000 000000 // // File: AbstractSequenceContainer.cpp // Created by: Guillaume Deuchst // Julien Dutheil // Created on: Wednesday July 30 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". uu As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "AbstractSequenceContainer.h" using namespace bpp; using namespace std; /**************************************************************************************************/ void AbstractSequenceContainer::setComments(const string& name, const Comments& comments) throw (SequenceNotFoundException) { size_t pos = getSequencePosition(name); setComments(pos, comments); } /**************************************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Container/VectorSiteContainer.h000644 000000 000000 00000024015 12147656566 023150 0ustar00rootroot000000 000000 // // File: VectorSiteContainer.h // Created by: Julien Dutheil // Created on: Mon Oct 6 11:50:40 2003 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _VECTORSITECONTAINER_H_ #define _VECTORSITECONTAINER_H_ #include "../Site.h" #include "SiteContainer.h" #include "AbstractSequenceContainer.h" #include "AlignedSequenceContainer.h" #include "OrderedSequenceContainer.h" #include // From the STL library: #include #include #include namespace bpp { /** * @brief The VectorSiteContainer class. * * Sites are stored in a std::vector of pointers. * Site access is hence in \f$O(1)\f$, and sequence access in \f$O(l)\f$, where * \f$l\f$ is the number of sites in the container. * * See AlignedSequenceContainer for an alternative implementation. * * @see Sequence, Site, AlignedSequenceContainer */ class VectorSiteContainer : public AbstractSequenceContainer, // This container implements the SequenceContainer interface // and use the AbstractSequenceContainer adapter. public virtual SiteContainer // This container is a SiteContainer. { protected: std::vector sites_; std::vector names_; std::vector comments_; // Sequences comments. mutable std::vector sequences_; // To store pointer toward sequences retrieves (cf. AlignedSequenceContainer). public: /** * @brief Build a new container from a set of sites. * * @param vs A std::vector of sites. * @param alpha The common alphabet for all sites. * @param checkPositions Check for the redundancy of site position tag. This may turn to be very time consuming! * @throw Exception If sites differ in size or in alphabet. */ VectorSiteContainer(const std::vector& vs, const Alphabet* alpha, bool checkPositions = true) throw (Exception); /** * @brief Build a new empty container with specified size. * * @param size Number of sequences in the container. * @param alpha The alphabet for this container. */ VectorSiteContainer(size_t size, const Alphabet* alpha); /** * @brief Build a new empty container with specified sequence names. * * @param names Sequence names. This will set the number of sequences in the container. * @param alpha The alphabet for this container. */ VectorSiteContainer(const std::vector& names, const Alphabet* alpha); /** * @brief Build a new empty container. * * @param alpha The alphabet for this container. */ VectorSiteContainer(const Alphabet* alpha); VectorSiteContainer(const VectorSiteContainer& vsc); VectorSiteContainer(const SiteContainer& sc); VectorSiteContainer(const OrderedSequenceContainer& osc); VectorSiteContainer(const SequenceContainer& sc); VectorSiteContainer& operator=(const VectorSiteContainer& vsc); VectorSiteContainer& operator=(const SiteContainer& sc); VectorSiteContainer& operator=(const OrderedSequenceContainer& osc); VectorSiteContainer& operator=(const SequenceContainer& sc); virtual ~VectorSiteContainer() { clear(); } public: /** * @name The Clonable interface. * * @{ */ VectorSiteContainer* clone() const { return new VectorSiteContainer(*this); } /** @} */ /** * @name The SiteContainer interface implementation: * * @{ */ const Site& getSite(size_t siteIndex) const throw (IndexOutOfBoundsException); void setSite(size_t siteIndex, const Site& site, bool checkPosition = true) throw (Exception); Site* removeSite(size_t siteIndex) throw (IndexOutOfBoundsException); void deleteSite(size_t siteIndex) throw (IndexOutOfBoundsException); void deleteSites(size_t siteIndex, size_t length) throw (IndexOutOfBoundsException); void addSite(const Site& site, bool checkPosition = true) throw (Exception); void addSite(const Site& site, int position, bool checkPosition = true) throw (Exception); void addSite(const Site& site, size_t siteIndex, bool checkPosition = true) throw (Exception); void addSite(const Site& site, size_t siteIndex, int position, bool checkPosition = true) throw (Exception); size_t getNumberOfSites() const; void reindexSites(); Vint getSitePositions() const; /** @} */ // Theses methods are implemented for this class: /** * @name The SequenceContainer interface. * * @{ */ void setComments(size_t sequenceIndex, const Comments& comments) throw (IndexOutOfBoundsException); // Method to get a sequence object from sequence container const Sequence& getSequence(size_t sequenceIndex) const throw (IndexOutOfBoundsException); const Sequence& getSequence(const std::string& name) const throw (SequenceNotFoundException); bool hasSequence(const std::string& name) const; // Methods to get position of a sequence in sequence container from his name // This method is used by delete and remove methods size_t getSequencePosition(const std::string& name) const throw (SequenceNotFoundException); Sequence* removeSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException); Sequence* removeSequence(const std::string& name) throw (SequenceNotFoundException); void deleteSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException); void deleteSequence(const std::string& name) throw (SequenceNotFoundException); size_t getNumberOfSequences() const { return names_.size(); } std::vector getSequencesNames() const; void setSequencesNames(const std::vector& names, bool checkNames = true) throw (Exception); void clear(); VectorSiteContainer* createEmptyContainer() const; int& valueAt(const std::string& sequenceName, size_t elementIndex) throw (SequenceNotFoundException, IndexOutOfBoundsException) { if (elementIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::valueAt(std::string, size_t).", elementIndex, 0, getNumberOfSites() - 1); return (*sites_[elementIndex])[getSequencePosition(sequenceName)]; } const int& valueAt(const std::string& sequenceName, size_t elementIndex) const throw (SequenceNotFoundException, IndexOutOfBoundsException) { if (elementIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::valueAt(std::string, size_t).", elementIndex, 0, getNumberOfSites() - 1); return (*sites_[elementIndex])[getSequencePosition(sequenceName)]; } int& operator()(const std::string& sequenceName, size_t elementIndex) { return (*sites_[elementIndex])[getSequencePosition(sequenceName)]; } const int& operator()(const std::string& sequenceName, size_t elementIndex) const { return (*sites_[elementIndex])[getSequencePosition(sequenceName)]; } int& valueAt(size_t sequenceIndex, size_t elementIndex) throw (IndexOutOfBoundsException) { if (sequenceIndex >= getNumberOfSequences()) throw IndexOutOfBoundsException("VectorSiteContainer::valueAt(size_t, size_t).", sequenceIndex, 0, getNumberOfSequences() - 1); if (elementIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::valueAt(size_t, size_t).", elementIndex, 0, getNumberOfSites() - 1); return (*sites_[elementIndex])[sequenceIndex]; } const int& valueAt(size_t sequenceIndex, size_t elementIndex) const throw (IndexOutOfBoundsException) { if (sequenceIndex >= getNumberOfSequences()) throw IndexOutOfBoundsException("VectorSiteContainer::valueAt(size_t, size_t).", sequenceIndex, 0, getNumberOfSequences() - 1); if (elementIndex >= getNumberOfSites()) throw IndexOutOfBoundsException("VectorSiteContainer::valueAt(size_t, size_t).", elementIndex, 0, getNumberOfSites() - 1); return (*sites_[elementIndex])[sequenceIndex]; } int& operator()(size_t sequenceIndex, size_t elementIndex) { return (*sites_[elementIndex])[sequenceIndex]; } const int& operator()(size_t sequenceIndex, size_t elementIndex) const { return (*sites_[elementIndex])[sequenceIndex]; } /** @} */ void addSequence(const Sequence& sequence, bool checkName = true) throw (Exception); void addSequence(const Sequence& sequence, size_t sequenceIndex, bool checkName = true) throw (Exception); void setSequence(const std::string& name, const Sequence& sequence, bool checkName) throw (Exception); void setSequence(size_t sequenceIndex, const Sequence& sequence, bool checkName) throw (Exception); protected: // Create n void sites: void realloc(size_t n); }; } // end of namespace bpp. #endif // _VECTORSITECONTAINER_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/SiteContainerIterator.h000644 000000 000000 00000007454 12147656566 023507 0ustar00rootroot000000 000000 // // File: SiteContainerIterator.h // Created by: Julien Dutheil // Created on: Sun Oct 19 12:47:16 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SITECONTAINERITERATOR_H_ #define _SITECONTAINERITERATOR_H_ #include "../Site.h" #include "../SiteIterator.h" #include "SiteContainer.h" namespace bpp { /** * @brief Partial implementation of the SiteIterator interface, allowing to loop over a site container. */ class AbstractSiteContainerIterator : public virtual ConstSiteIterator { protected: const SiteContainer* sites_; int currentPosition_; public: AbstractSiteContainerIterator(const SiteContainer& sites); AbstractSiteContainerIterator(const AbstractSiteContainerIterator& asi) : sites_(asi.sites_), currentPosition_(asi.currentPosition_) {} AbstractSiteContainerIterator& operator=(const AbstractSiteContainerIterator& asi) { sites_ = asi.sites_; currentPosition_ = asi.currentPosition_; return *this; } virtual ~AbstractSiteContainerIterator() {} }; /** * @brief Loop over all sites in a SiteContainer. */ class SimpleSiteContainerIterator: public AbstractSiteContainerIterator { public: SimpleSiteContainerIterator(const SiteContainer& sites); virtual ~SimpleSiteContainerIterator() {} public: const Site* nextSite(); bool hasMoreSites() const; }; /** * @brief Loop over all sites without gaps in a SiteContainer. */ class NoGapSiteContainerIterator: public AbstractSiteContainerIterator { public: NoGapSiteContainerIterator(const SiteContainer & sites); virtual ~NoGapSiteContainerIterator() {} public: const Site* nextSite(); bool hasMoreSites() const; int nextSiteWithoutGapPosition(int current) const; int previousSiteWithoutGapPosition(int current) const; }; /** * @brief Loop over all complete sites in a SiteContainer * (i.e. sites without gap and unresolved characters). */ class CompleteSiteContainerIterator: public AbstractSiteContainerIterator { public: CompleteSiteContainerIterator(const SiteContainer & sites); virtual ~CompleteSiteContainerIterator() {} public: const Site * nextSite(); bool hasMoreSites() const; int nextCompleteSitePosition(int current) const; int previousCompleteSitePosition(int current) const; }; } //end of namespace bpp. #endif //_SITEITERATOR_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/SiteContainerExceptions.h000644 000000 000000 00000005600 12147656566 024026 0ustar00rootroot000000 000000 // // File SiteContainerExceptions.h // Author: Julien Dutheil // Created on: mer mar 31 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SITECONTAINEREXCEPTIONS_H_ #define _SITECONTAINEREXCEPTIONS_H_ // From STL #include #include namespace bpp { /** * @brief The site not found exception base class. * * @see Exception */ class SiteNotFoundException: public Exception { protected: /** * @brief The id of the site that was to be found. */ const std::string id; public: // Class constructor /** * @brief Build a new SiteNotFoundException object. * * @param text A message to be passed to the exception hierarchy. * @param sId A the id of the site that was to be found. */ SiteNotFoundException(const char * text, const char * sId = ""); /** * @brief Build a new SiteNotFoundException object. * * @param text A message to be passed to the exception hierarchy. * @param sId A the id of the site that was to be found. */ SiteNotFoundException(const std::string & text, const std::string & sId = ""); // Class destructor ~SiteNotFoundException() throw(); public: /** * @brief Get the id of the site that was to be found. * * @return The id of the site that was to be found. */ virtual const std::string getSiteId() const; }; } //end of namespace bpp. #endif // _SITECONTAINEREXCEPTIONS_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/AbstractSequenceContainer.h000644 000000 000000 00000012643 12147656566 024321 0ustar00rootroot000000 000000 // // File AbstractSequenceContainer.h // Created by: Guillaume Deuchst // Julien Dutheil // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ABSTRACTSEQUENCECONTAINER_H_ #define _ABSTRACTSEQUENCECONTAINER_H_ #include "../Alphabet/Alphabet.h" #include "../Sequence.h" #include "SequenceContainer.h" #include "OrderedSequenceContainer.h" #include namespace bpp { /** * @brief Partial implementation of the OrderedSequenceContainer interface. * * This abstract class provides an alphabet and comments, with associated methods. */ class AbstractSequenceContainer: public virtual OrderedSequenceContainer { private: /** * @brief The container's alphabet. */ const Alphabet* alphabet_; /** * @brief The container's comments. */ Comments comments_; public: /** * @brief This constructor initialize the alphabet pointer. * * This constructor is to be called by constructors of derived classes. * * @param alpha The alphabet to be associated to this container. */ AbstractSequenceContainer(const Alphabet* alpha): alphabet_(alpha), comments_() {} AbstractSequenceContainer(const AbstractSequenceContainer& sc): alphabet_(sc.alphabet_), comments_(sc.comments_) {} AbstractSequenceContainer& operator=(const AbstractSequenceContainer& sc) { alphabet_ = sc.alphabet_; comments_ = sc.comments_; return *this; } /** * @brief Copy constructor from any SequenceContainer object. * * @param sc Another sequence container. */ AbstractSequenceContainer(const SequenceContainer& sc): alphabet_(sc.getAlphabet()), comments_(sc.getGeneralComments()) {} /** * @brief Assignation operator from any SequenceContainer object. * * @param sc Another sequence container. */ AbstractSequenceContainer& operator=(const SequenceContainer& sc) { alphabet_ = sc.getAlphabet(); comments_ = sc.getGeneralComments(); return *this; } virtual ~AbstractSequenceContainer() {} public: /** * @name From the SequenceContainer interface * * @{ */ const Alphabet* getAlphabet() const { return alphabet_; } const std::vector& getContent(const std::string& name) const throw (SequenceNotFoundException) { return getSequence(name).getContent(); } std::string toString(const std::string& name) const throw (SequenceNotFoundException) { return getSequence(name).toString(); } const Comments& getComments(const std::string& name) const throw (SequenceNotFoundException) { return getSequence(name).getComments(); } void setComments(const std::string& name, const Comments& comments) throw (SequenceNotFoundException); const Comments& getGeneralComments() const { return comments_; } void setGeneralComments(const Comments& comments) { comments_ = comments; } void deleteGeneralComments() { comments_.clear(); } /** @} */ /** * @name From the OrderedSequenceContainer interface * * @{ */ virtual const std::string& getName(size_t sequenceIndex) const throw (IndexOutOfBoundsException) { return getSequence(sequenceIndex).getName(); } virtual const std::vector& getContent(size_t sequenceIndex) const throw (IndexOutOfBoundsException) { return getSequence(sequenceIndex).getContent(); } virtual std::string toString(size_t sequenceIndex) const throw (IndexOutOfBoundsException) { return getSequence(sequenceIndex).toString(); } virtual const Comments& getComments(size_t sequenceIndex) const throw (IndexOutOfBoundsException) { return getSequence(sequenceIndex).getComments(); } virtual void setComments(size_t sequenceIndex, const Comments& comments) throw (IndexOutOfBoundsException) = 0; /** @} */ }; } //end of namespace bpp. #endif // _ABSTRACTSEQUENCECONTAINER_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/SiteContainerExceptions.cpp000644 000000 000000 00000004646 12147656566 024372 0ustar00rootroot000000 000000 // // File SiteContainerExceptions.cpp // Author: Julien Dutheil // Created on: mer mar 31 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SiteContainerExceptions.h" using namespace bpp; using namespace std; /******************************************************************************* * Site containers exceptions * *******************************************************************************/ SiteNotFoundException::SiteNotFoundException(const char * text, const char * sId) : Exception("SequenceNotFoundException: " + string(text) + "(" + sId + ")"), id(sId) {}; SiteNotFoundException::SiteNotFoundException(const string & text, const string & sId) : Exception("SequenceNotFoundException: " + text + "(" + sId + ")"), id(sId) {}; SiteNotFoundException::~SiteNotFoundException() throw() {}; const string SiteNotFoundException::getSiteId() const { return id; } bpp-seq-2.1.0/src/Bpp/Seq/Container/MapSequenceContainer.h000644 000000 000000 00000021003 12147656566 023261 0ustar00rootroot000000 000000 // // File MapSequenceContainer.h // Authors : Guillaume Deuchst // Julien Dutheil // Sylvain Gaillard // Last modification : Friday June 25 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _MAPSEQUENCECONTAINER_H_ #define _MAPSEQUENCECONTAINER_H_ #include "../Alphabet/Alphabet.h" #include "../Sequence.h" #include "AbstractSequenceContainer.h" #include #include namespace bpp { /** * @brief MapSequenceContainer class * * Sequences are stored using a key std::string, in a map object. * Sequences are ordered according to the key order (defined by the < operator). * */ class MapSequenceContainer: public AbstractSequenceContainer { private: std::map sequences_; public: MapSequenceContainer(const std::map& ms, const Alphabet* alpha); MapSequenceContainer(const Alphabet* alpha): AbstractSequenceContainer(alpha), sequences_() {} MapSequenceContainer(const MapSequenceContainer& msc); MapSequenceContainer& operator=(const MapSequenceContainer& msc); virtual ~MapSequenceContainer(); public: /** * @brief Get a sequence. * * @param key The key of the sequence to retrieve. * @return The sequence associated to the given key. * @throw SequenceNotFoundException If no sequence is associated to the given key. */ const Sequence& getSequenceByKey(const std::string& key) const throw (SequenceNotFoundException); /** * @brief Set a sequence. * * @param key The key of the sequence. * @param sequence The new sequence that will be associated to the key. * @param checkNames Tell is the sequence name must be checked. * @throw SequenceNotFoundException If no sequence is associated to the given key. */ void setSequenceByKey(const std::string& key , const Sequence& sequence, bool checkNames = true) throw (SequenceNotFoundException); /** * @brief Remove a sequence. * * @param key The key of the sequence. * @return The sequence previously associated to the given key. * @throw SequenceNotFoundException If no sequence is associated to the given key. */ Sequence* removeSequenceByKey(const std::string& key) throw (SequenceNotFoundException); /** * @brief Delete a sequence. * * @param key The key of the sequence. * @throw SequenceNotFoundException If no sequence is associated to the given key. */ void deleteSequenceByKey(const std::string& key) throw (SequenceNotFoundException); /** * @brief Add a sequence and key. * * @param key The key of the new sequence. * @param sequence The new sequence that will be associated to the key. * @param checkNames Tell is the sequence name must be checked. */ void addSequence(const std::string& key, const Sequence& sequence, bool checkNames = true) throw (Exception); /** * @return All sequences keys. */ std::vector getKeys() const; /** * @return The key of a given sequence specified by its position in the container. * @param pos The index of the sequence. * @throw IndexOutOfBoundsException If pos is not a valid index. */ std::string getKey(size_t pos) const throw (IndexOutOfBoundsException); /** * @return The key of a given sequence specified by its name. * @param name The name of the sequence. * @throw SequenceNotFoundException If no sequence was found with the given name. */ std::string getKey(const std::string& name) const throw (SequenceNotFoundException); /** * @name The clonable interface * * @{ */ MapSequenceContainer* clone() const { return new MapSequenceContainer(*this); } /** * @} */ /** * @name The SequenceContainer interface implementation: * * @{ */ const Sequence& getSequence(const std::string& name) const throw (SequenceNotFoundException); bool hasSequence(const std::string& name) const; /** * @brief The SequenceContainer method. Calls the addSeqeucne(key, Sequence) method while using the resut of sequence.getName() as a key. */ void addSequence(const Sequence& sequence, bool checkNames = true) throw (Exception) { addSequence(sequence.getName(), sequence, checkNames); } void setSequence(const std::string& name, const Sequence& sequence, bool checkName = true) throw (SequenceNotFoundException); Sequence* removeSequence(const std::string& name) throw (SequenceNotFoundException); void deleteSequence(const std::string& name) throw (SequenceNotFoundException); size_t getNumberOfSequences() const { return sequences_.size(); } void clear(); MapSequenceContainer* createEmptyContainer() const; int& valueAt(const std::string& sequenceName, size_t elementIndex) throw (SequenceNotFoundException, IndexOutOfBoundsException) { return getSequence_(sequenceName)[elementIndex]; } const int& valueAt(const std::string& sequenceName, size_t elementIndex) const throw (SequenceNotFoundException, IndexOutOfBoundsException) { return getSequence(sequenceName)[elementIndex]; } int& operator()(const std::string& sequenceName, size_t elementIndex) { return getSequence_(sequenceName)[elementIndex]; } const int& operator()(const std::string & sequenceName, size_t elementIndex) const { return getSequence(sequenceName)[elementIndex]; } int& valueAt(size_t sequenceIndex, size_t elementIndex) throw (IndexOutOfBoundsException) { return getSequence_(sequenceIndex)[elementIndex]; } const int& valueAt(size_t sequenceIndex, size_t elementIndex) const throw (IndexOutOfBoundsException) { return getSequence(sequenceIndex)[elementIndex]; } int& operator()(size_t sequenceIndex, size_t elementIndex) { return getSequence_(sequenceIndex)[elementIndex]; } const int& operator()(size_t sequenceIndex, size_t elementIndex) const { return getSequence(sequenceIndex)[elementIndex]; } /** @} */ /** * @name The OrderedSequenceContainer interface implementation: * * @{ */ const Sequence& getSequence(size_t sequenceIndex) const throw (IndexOutOfBoundsException); size_t getSequencePosition(const std::string& name) const throw (SequenceNotFoundException); void setSequence(size_t sequenceIndex, const Sequence& sequence, bool checkName = true) throw (IndexOutOfBoundsException); Sequence* removeSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException); void deleteSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException); void setComments(size_t sequenceIndex, const Comments& comments) throw (IndexOutOfBoundsException); std::vector getSequencesNames() const; void setSequencesNames(const std::vector& names, bool checkNames) throw (Exception); /** @} */ /** * @name AbstractSequenceContainer methods. * * @{ */ Sequence& getSequence_(size_t i) throw (IndexOutOfBoundsException); Sequence& getSequence_(const std::string& name) throw (SequenceNotFoundException); /** @} */ }; } //end of namespace bpp. #endif // _MAPSEQUENCECONTAINER_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/SequenceContainer.h000644 000000 000000 00000027026 12147656566 022636 0ustar00rootroot000000 000000 // // File: SequenceContainer.h // Created by: Guillaume Deuchst // Julien Dutheil // Created on: Fri Jul 25 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCECONTAINER_H_ #define _SEQUENCECONTAINER_H_ #include "../Alphabet/Alphabet.h" #include "../Sequence.h" #include "SequenceContainerExceptions.h" #include // From the STL: #include namespace bpp { /** * @brief The SequenceContainer interface. * * This interface is the most general one in the container hierarchy. * No assumption is made on the sequences in the container (no ordering, no alignment). * Sequences may be retrieved using their names, which must be unique. * * The container is the only one responsible for the allocation/destruction of sequences it * contains. This means that any sequence passed to it will be copied into the container. * The container also provides methods that send const pointers towards these sequences * (without performing any copy of the underlying objects). * * Notes : * 1. methods for adding sequences to the container are not declared here * (so they can't be used throught this interface), * because these methods take sequence container's type specific parameters * (i.e. a key for map sequence containers); * 2. to delete a sequence from a container, one must use the appropriate method * (removeSequence() and deleteSequence()). * These methods performs a few check, and properly update pointers. * You should never delete a sequence from a container by yourself. * * @see Sequence */ class SequenceContainer: public virtual Clonable { public: SequenceContainer() {} virtual ~SequenceContainer() {} public: /** * @brief Get sequence container's alphabet. * * @return The alphabet associated to this container. */ virtual const Alphabet* getAlphabet() const = 0; /** * @brief Get the content of a sequence. * * @param name The name of the sequence. * @return The content of the sequence as a vector of integers. * @throw SequenceNotFoundException If the name does not match any sequence in the container. */ virtual const std::vector& getContent(const std::string& name) const throw (SequenceNotFoundException) = 0; /** * @brief Convert a particular sequence to a string. * * @param name The name of the sequence. * @return A string describing the content of the sequence. * @throw SequenceNotFoundException If the name does not match any sequence in the container. */ virtual std::string toString(const std::string& name) const throw (SequenceNotFoundException) = 0; /** * @brief Retrieve a sequence object from the container. * * @param name The name of the sequence. * @return A reference toward the Sequence with corresponding name. * @throw SequenceNotFoundException If the name does not match any sequence in the container. */ virtual const Sequence& getSequence(const std::string& name) const throw (SequenceNotFoundException) = 0; /** * @brief Check if a sequence with a given name is present in the container. * * @param name The name of the sequence. * @return True if a sequence with the given name is present in the container. */ virtual bool hasSequence(const std::string& name) const = 0; /** * @brief Add a sequence to the container. * * @param sequence The sequence to add. * @param checkName Tell if the container must check if the name of the sequence * is already used in the container before adding it. * @throw Exception Any other kind of exception, if the name of the sequence is * already used, are whatever else depending on the implementation. */ virtual void addSequence(const Sequence& sequence, bool checkName) throw (Exception) = 0; /** * @brief Replace a sequence in the container. * * @param name The name of the sequence. * @param sequence The sequence to add. * @param checkName Tell if the container must check if the name of the sequence * is already used in the container before adding it. * @throw SequenceNotFoundException If the name does not match any sequence in * the container. * @throw Exception Any other kind of exception, if the name of the sequence is * already used, are whatever else depending on the implementation. */ virtual void setSequence(const std::string& name, const Sequence& sequence, bool checkName) throw (Exception) = 0; /** * @brief Extract (and remove) a sequence from the container. * * @param name The name of the sequence. * @throw SequenceNotFoundException If the name does not match any sequence in * the container. */ virtual Sequence* removeSequence(const std::string& name) throw (SequenceNotFoundException, Exception) = 0; /** * @brief Delete a sequence of the container. * * @param name The name of the sequence. * @throw SequenceNotFoundException If the name does not match any sequence in * the container. */ virtual void deleteSequence(const std::string& name) throw (SequenceNotFoundException, Exception) = 0; /** * @brief Get the number of sequences in the container. * * @return The number of sequences in the container. */ virtual size_t getNumberOfSequences() const = 0; /** * @brief Get all the names of the sequences in the container. * * @return A vector of strings with all sequence names. */ virtual std::vector getSequencesNames() const = 0; /** * @brief Set all sequence names. * * @param names A vector of strings with all sequence names. * Its size must be strictly equal to the the size of the container (the number of * sequences). * @param checkNames Tell if the container must check if the name of the sequence * is already used in the container before adding it. * @throw Exception If there are redundant names in the input vector. */ virtual void setSequencesNames(const std::vector& names, bool checkNames) throw (Exception) = 0; /** * @brief Get comments of a particular sequence. * * @param name The name of the sequence. * @return The comments associated to sequence with name 'name'. * @throw SequenceNotFoundException If the name does not match any sequence in * the container. */ virtual const Comments& getComments(const std::string& name) const throw (SequenceNotFoundException) = 0; /** * @brief Set the comments of a particular sequence. * * @param name The name of the sequence. * @param comments The comments to set to sequence with name 'name'. * @throw SequenceNotFoundException If the name does not match any sequence in * the container. */ virtual void setComments(const std::string& name, const Comments& comments) throw (SequenceNotFoundException) = 0; /** * @brief Get the comments of this container. * * @return The comments associated to this container. */ virtual const Comments& getGeneralComments() const = 0; /** * @brief Set the comments of this container. * * @param comments The comments to be associated to this container. */ virtual void setGeneralComments(const Comments& comments) = 0; /** * @brief Delete the comments associated to this container. */ virtual void deleteGeneralComments() = 0; /** * @brief Delete all sequences in the container. */ virtual void clear() = 0; /** * @brief Return a copy of this container, but with no sequence inside. * * This method creates a new SequenceContainer objet. * The class of this container depends on the derivative class. * * @return A new empty container, with the same alphabet as this one. */ virtual SequenceContainer* createEmptyContainer() const = 0; /** * @name Provide direct access to sequences content. * * @warning These operators allow you to modifiy the content of the sequences. * No checking is performed for your modifications, so use with care, or * consider using the setContent() methods. * * @{ */ /** * @brief Element access function. * * Allows direct access to the data stored in the container. * * @param sequenceName The sequence name. * @param elementIndex The element position within the sequence. * @throw SequenceNotFoundException If no corresponding sequence is found in the container. * @throw IndexOutOfBoundsException If the element position is not valid. */ virtual int& valueAt(const std::string& sequenceName, size_t elementIndex) throw (SequenceNotFoundException, IndexOutOfBoundsException) = 0; /** * @brief Element access function. * * Allows direct access to the data stored in the container. * * @param sequenceName The sequence name. * @param elementIndex The element position within the sequence. * @throw SequenceNotFoundException If no corresponding sequence is found in the container. * @throw IndexOutOfBoundsException If the element position is not valid. */ virtual const int& valueAt(const std::string& sequenceName, size_t elementIndex) const throw (SequenceNotFoundException, IndexOutOfBoundsException) = 0; /** * @brief Element access operator. * * Allows direct access to the data stored in the container. * This method is faster then the valueAt function, but input * parameters are not checked! * * @param sequenceName The sequence name. * @param elementIndex The element position within the sequence. */ virtual int& operator()(const std::string& sequenceName, size_t elementIndex) = 0; /** * @brief Element access operator. * * Allows direct access to the data stored in the container. * This method is faster then the valueAt function, but input * parameters are not checked! * * @param sequenceName The sequence name. * @param elementIndex The element position within the sequence. */ virtual const int& operator()(const std::string& sequenceName, size_t elementIndex) const = 0; /** @} */ }; } //end of namespace bpp. #endif // _SEQUENCECONTAINER_H_ bpp-seq-2.1.0/src/Bpp/Seq/Container/SiteContainerIterator.cpp000644 000000 000000 00000011217 12147656566 024032 0ustar00rootroot000000 000000 // // File: SiteContainerIterator.cpp // Created by: Julien Dutheil // Created on: Sun Oct 19 12:47:16 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SiteContainerIterator.h" #include "../SiteTools.h" using namespace bpp; // From the STL: #include using namespace std; /******************************************************************************/ AbstractSiteContainerIterator::AbstractSiteContainerIterator(const SiteContainer& sites) : sites_(&sites), currentPosition_(0) {} /******************************************************************************/ SimpleSiteContainerIterator::SimpleSiteContainerIterator(const SiteContainer& sites): AbstractSiteContainerIterator(sites) {} const Site* SimpleSiteContainerIterator::nextSite() { const Site* s = &sites_->getSite(currentPosition_); currentPosition_++; return s; } bool SimpleSiteContainerIterator::hasMoreSites() const { return currentPosition_ < static_cast(sites_->getNumberOfSites()); } /******************************************************************************/ NoGapSiteContainerIterator::NoGapSiteContainerIterator(const SiteContainer& sites): AbstractSiteContainerIterator(sites) { currentPosition_ = nextSiteWithoutGapPosition(-1); } const Site* NoGapSiteContainerIterator::nextSite() { const Site* s = &sites_->getSite(currentPosition_); currentPosition_ = nextSiteWithoutGapPosition(currentPosition_); return s; } bool NoGapSiteContainerIterator::hasMoreSites() const { return currentPosition_ < static_cast(sites_->getNumberOfSites()); } int NoGapSiteContainerIterator::nextSiteWithoutGapPosition(int current) const { unsigned int position = current + 1; while (position < sites_->getNumberOfSites() && SiteTools::hasGap(sites_->getSite(position))) position++; return position; } int NoGapSiteContainerIterator::previousSiteWithoutGapPosition(int current) const { int position = current - 1; while (position >= 0 && SiteTools::hasGap(sites_->getSite(position))) position --; return position; } /******************************************************************************/ CompleteSiteContainerIterator::CompleteSiteContainerIterator(const SiteContainer & sites): AbstractSiteContainerIterator(sites) { currentPosition_ = nextCompleteSitePosition(-1); } const Site* CompleteSiteContainerIterator::nextSite() { const Site* s = &sites_->getSite(currentPosition_); currentPosition_ = nextCompleteSitePosition(currentPosition_); return s; } bool CompleteSiteContainerIterator::hasMoreSites() const { return currentPosition_ < static_cast(sites_->getNumberOfSites()); } int CompleteSiteContainerIterator::nextCompleteSitePosition(int current) const { unsigned int position = current + 1; while (position < sites_->getNumberOfSites() && !SiteTools::isComplete(sites_->getSite(position))) position ++; return position; } int CompleteSiteContainerIterator::previousCompleteSitePosition(int current) const { int position = current - 1; while (position >= 0 && !SiteTools::isComplete(sites_->getSite(position))) position --; return position; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Container/VectorSequenceContainer.cpp000644 000000 000000 00000032553 12147656566 024355 0ustar00rootroot000000 000000 // // File VectorSequenceContainer.cpp // Author : Guillaume Deuchst // Julien Dutheil // Last modification : Wednesday July 30 2003 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "VectorSequenceContainer.h" #include using namespace bpp; using namespace std; /** Class constructors: *******************************************************/ VectorSequenceContainer::VectorSequenceContainer( const std::vector& vs, const Alphabet* alpha) throw (AlphabetMismatchException) : AbstractSequenceContainer(alpha), sequences_() { for (std::vector::const_iterator i = vs.begin(); i < vs.end(); i++) { addSequence(**i); } } /** Copy constructors: ********************************************************/ VectorSequenceContainer::VectorSequenceContainer( const VectorSequenceContainer& vsc) : AbstractSequenceContainer(vsc), sequences_() { size_t max = vsc.getNumberOfSequences(); for (size_t i = 0; i < max; i++) { addSequence(vsc.getSequence(i), false); } } VectorSequenceContainer::VectorSequenceContainer( const OrderedSequenceContainer& osc) : AbstractSequenceContainer(osc.getAlphabet()), sequences_() { // Sequences insertion for (unsigned int i = 0; i < osc.getNumberOfSequences(); i++) { addSequence(osc.getSequence(i), false); } } VectorSequenceContainer::VectorSequenceContainer( const SequenceContainer& sc) : AbstractSequenceContainer(sc.getAlphabet()), sequences_() { // Sequences insertion std::vector names = sc.getSequencesNames(); for (unsigned int i = 0; i < names.size(); i++) { addSequence(sc.getSequence(names[i]), false); } setGeneralComments(sc.getGeneralComments()); } /** Assignation operator: *****************************************************/ VectorSequenceContainer& VectorSequenceContainer::operator=( const VectorSequenceContainer& vsc) { clear(); AbstractSequenceContainer::operator=(vsc); // Sequences insertion size_t max = vsc.getNumberOfSequences(); for (size_t i = 0; i < max; i++) { addSequence(vsc.getSequence(i), false); } return *this; } VectorSequenceContainer& VectorSequenceContainer::operator=( const OrderedSequenceContainer& osc) { clear(); AbstractSequenceContainer::operator=(osc); // Sequences insertion size_t max = osc.getNumberOfSequences(); for (unsigned int i = 0; i < max; i++) { addSequence(osc.getSequence(i), false); } return *this; } /******************************************************************************/ VectorSequenceContainer& VectorSequenceContainer::operator=( const SequenceContainer& sc) { clear(); AbstractSequenceContainer::operator=(sc); // Seq names: std::vector names = sc.getSequencesNames(); for (unsigned int i = 0; i < names.size(); i++) { addSequence(sc.getSequence(names[i]), false); } return *this; } /******************************************************************************/ const Sequence& VectorSequenceContainer::getSequence(size_t sequenceIndex) const throw (IndexOutOfBoundsException) { // Specified sequence existence verification if (sequenceIndex < sequences_.size()) return *sequences_[sequenceIndex]; throw IndexOutOfBoundsException("VectorSequenceContainer::getSequence.", sequenceIndex, 0, sequences_.size() - 1); } /******************************************************************************/ bool VectorSequenceContainer::hasSequence(const string& name) const { // Specified sequence name research into all sequences for (size_t i = 0; i < sequences_.size(); i++) { if (sequences_[i]->getName() == name) return true; } return false; } /******************************************************************************/ const Sequence& VectorSequenceContainer::getSequence(const string& name) const throw (SequenceNotFoundException) { // Specified sequence name research into all sequences for (size_t i = 0; i < sequences_.size(); i++) { if (sequences_[i]->getName() == name) return *sequences_[i]; } throw SequenceNotFoundException("VectorSequenceContainer::getSequence : Specified sequence doesn't exist", name); } /******************************************************************************/ Sequence& VectorSequenceContainer::getSequence_(size_t sequenceIndex) throw (IndexOutOfBoundsException) { // Specified sequence existence verification if (sequenceIndex < sequences_.size()) return *sequences_[sequenceIndex]; throw IndexOutOfBoundsException("VectorSequenceContainer::getSequence.", sequenceIndex, 0, sequences_.size() - 1); } /******************************************************************************/ Sequence& VectorSequenceContainer::getSequence_(const string& name) throw (SequenceNotFoundException) { // Specified sequence name research into all sequences for (size_t i = 0; i < sequences_.size(); i++) { if (sequences_[i]->getName() == name) return *sequences_[i]; } throw SequenceNotFoundException("VectorSequenceContainer::getSequence : Specified sequence doesn't exist", name); } /******************************************************************************/ size_t VectorSequenceContainer::getSequencePosition(const string& name) const throw (SequenceNotFoundException) { // Specified sequence name research into all sequences for (size_t i = 0; i < sequences_.size(); i++) { if (sequences_[i]->getName() == name) return i; } throw SequenceNotFoundException("VectorSequenceContainer::getSequencePosition : Specified sequence doesn't exist", name); } /******************************************************************************/ void VectorSequenceContainer::setSequence(size_t sequenceIndex, const Sequence& sequence, bool checkName) throw (Exception) { // Sequence's name existence checking if (checkName) { // For all names in vector : throw exception if name already exists for (size_t j = 0; j < sequences_.size(); j++) { if (sequences_[j]->getName() == sequence.getName()) if (j != sequenceIndex) throw Exception("VectorSequenceContainer::setSequence : Sequence's name already exists in container"); } } // New sequence's alphabet and sequence container's alphabet matching verification if (sequence.getAlphabet()->getAlphabetType() == getAlphabet()->getAlphabetType()) { // Delete old sequence delete sequences_[sequenceIndex]; // New sequence insertion in sequence container sequences_[sequenceIndex] = dynamic_cast(sequence.clone()); } else throw AlphabetMismatchException("VectorSequenceContainer::setSequence : Alphabets don't match", getAlphabet(), sequence.getAlphabet()); } /******************************************************************************/ Sequence* VectorSequenceContainer::removeSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException) { // Copy sequence: if (sequenceIndex >= sequences_.size()) throw IndexOutOfBoundsException("VectorSequenceContainer::removeSequence.", sequenceIndex, 0, sequences_.size() - 1); Sequence* old = sequences_[sequenceIndex]; // Remove pointer toward old sequence: sequences_.erase(sequences_.begin() + sequenceIndex); // Send copy: return old; } /******************************************************************************/ void VectorSequenceContainer::deleteSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException) { // Delete sequence if (sequenceIndex >= sequences_.size()) throw IndexOutOfBoundsException("VectorSequenceContainer::deleteSequence.", sequenceIndex, 0, sequences_.size() - 1); delete sequences_[sequenceIndex]; // Remove pointer toward old sequence: sequences_.erase(sequences_.begin() + sequenceIndex); } /******************************************************************************/ void VectorSequenceContainer::addSequence(const Sequence& sequence, bool checkName) throw (Exception) { // Sequence's name existence checking if (checkName) { // For all names in vector : throw exception if name already exists for (size_t i = 0; i < sequences_.size(); i++) { if (sequences_[i]->getName() == sequence.getName()) throw Exception("VectorSequenceContainer::addSequence : Sequence '" + sequence.getName() + "' already exists in container"); } } // New sequence's alphabet and sequence container's alphabet matching verification if (sequence.getAlphabet()->getAlphabetType() == getAlphabet()->getAlphabetType()) { // push_back(new Sequence(sequence.getName(), sequence.getContent(), alphabet)); sequences_.push_back(dynamic_cast(sequence.clone())); } else throw AlphabetMismatchException("VectorSequenceContainer::addSequence : Alphabets don't match", getAlphabet(), sequence.getAlphabet()); } void VectorSequenceContainer::addSequence(const Sequence& sequence, size_t sequenceIndex, bool checkName) throw (Exception) { // Sequence's name existence checking if (checkName) { // For all names in vector : throw exception if name already exists for (size_t i = 0; i < sequences_.size(); i++) { if (sequences_[i]->getName() == sequence.getName()) throw Exception("VectorSequenceContainer::addSequence : Sequence '" + sequence.getName() + "' already exists in container"); } } // New sequence's alphabet and sequence container's alphabet matching verification if (sequence.getAlphabet()->getAlphabetType() == getAlphabet()->getAlphabetType()) { // insert(begin() + pos, new Sequence(sequence.getName(), sequence.getContent(), alphabet)); sequences_.insert(sequences_.begin() + sequenceIndex, dynamic_cast(sequence.clone())); } else throw AlphabetMismatchException("VectorSequenceContainer::addSequence : Alphabets don't match", getAlphabet(), sequence.getAlphabet()); } /******************************************************************************/ std::vector VectorSequenceContainer::getSequencesNames() const { std::vector names; for (size_t i = 0; i < sequences_.size(); i++) { names.push_back(sequences_[i]->getName()); } return names; } /******************************************************************************/ void VectorSequenceContainer::setSequencesNames( const std::vector& names, bool checkNames) throw (Exception) { if (names.size() != getNumberOfSequences()) throw IndexOutOfBoundsException("VectorSequenceContainer::setSequenceNames : bad number of names", names.size(), getNumberOfSequences(), getNumberOfSequences()); if (checkNames) { for (size_t i = 0; i < names.size(); i++) { // For all names in vector : throw exception if name already exists for (size_t j = 0; j < i; j++) { if (names[j] == names[i]) throw Exception("VectorSiteContainer::setSequencesNames : Sequence's name already exists in container"); } } } for (size_t i = 0; i < names.size(); i++) { sequences_[i]->setName(names[i]); } } /******************************************************************************/ void VectorSequenceContainer::clear() { // Delete sequences for (size_t i = 0; i < sequences_.size(); i++) { delete sequences_[i]; } // Delete all sequence pointers sequences_.clear(); } /******************************************************************************/ void VectorSequenceContainer::setComments(size_t sequenceIndex, const Comments& comments) throw (IndexOutOfBoundsException) { sequences_[sequenceIndex]->setComments(comments); } /******************************************************************************/ VectorSequenceContainer* VectorSequenceContainer::createEmptyContainer() const { VectorSequenceContainer* vsc = new VectorSequenceContainer(getAlphabet()); vsc->setGeneralComments(getGeneralComments()); return vsc; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/SequenceWithQuality.cpp000644 000000 000000 00000005527 12147656566 021613 0ustar00rootroot000000 000000 // // File: SequenceWithQuality.cpp // Author: Sylvain Gaillard // Created: 19/01/2010 16:09:02 // /* Copyright or © or Copr. Bio++ Development Team, (January 19, 2010) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SequenceWithQuality.h" #include using namespace bpp; #include #include using namespace std; const string SequenceQuality::QUALITY_SCORE = "Quality score"; const int SequenceQuality::DEFAULT_QUALITY_VALUE = 20; /******************************************************************************/ void SequenceQuality::afterSequenceChanged(const SymbolListEditionEvent& event) { qualScores_.clear(); qualScores_.insert(qualScores_.begin(), event.getSymbolList()->size(), DEFAULT_QUALITY_VALUE); } /******************************************************************************/ void SequenceQuality::afterSequenceInserted(const SymbolListInsertionEvent& event) { qualScores_.insert(qualScores_.begin() + event.getPosition(), event.getLength(), DEFAULT_QUALITY_VALUE); } /******************************************************************************/ void SequenceQuality::afterSequenceDeleted(const SymbolListDeletionEvent& event) { qualScores_.erase(qualScores_.begin() + event.getPosition(), qualScores_.begin() + event.getPosition() + event.getLength()); } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Site.h000644 000000 000000 00000013341 12147656566 016200 0ustar00rootroot000000 000000 // // File Site.h // Created by: Guillaume Deuchst // Julien Dutheil // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SITE_H_ #define _SITE_H_ #include "SymbolList.h" #include "SiteExceptions.h" namespace bpp { /** * @brief The Site class. * * Define specific attributes and methods for sites manipulation. * It is very similar to the Sequence object (a site is a vertical sequence!), * and characters at each position are coded as integers. * Sites have a 'position' attribute. * This attribute stands for an indice in a an alignment, and may be used as a unique identifier, * in the same manner that names identify sequence objects. * But for now, we do not allow to construct a Site directly from a string. * This should not be a constraint, since you never read sites directly from a file. */ class Site: public BasicSymbolList { private: /** * @brief The position associated to this site. */ int position_; public: /** * @brief Build a new void Site object with the specified alphabet. * * @param alpha The alphabet to use. */ Site(const Alphabet* alpha); /** * @brief Build a new void Site object with the specified alphabet and position. * * @param alpha The alphabet to use. * @param position The position attribute for this site. */ Site(const Alphabet* alpha, int position); /** * @brief Build a new Site object with the specified alphabet. * The content of the site is initialized from a vector of characters. * * @param site The content of the site. * @param alpha The alphabet to use. * @throw BadCharException If the content does not match the specified alphabet. */ Site(const std::vector& site, const Alphabet* alpha) throw (BadCharException); /** * @brief Build a new Site object with the specified alphabet and position. * The content of the site is initialized from a vector of characters. * * @param site The content of the site. * @param alpha The alphabet to use. * @param position The position attribute for this site. * @throw BadCharException If the content does not match the specified alphabet. */ Site(const std::vector& site, const Alphabet* alpha, int position) throw (BadCharException); /** * @brief Build a new Site object with the specified alphabet. * The content of the site is initialized from a vector of integers. * * @param site The content of the site. * @param alpha The alphabet to use. * @throw BadIntException If the content does not match the specified alphabet. */ Site(const std::vector& site, const Alphabet* alpha) throw (BadIntException); /** * @brief Build a new Site object with the specified alphabet and position. * The content of the site is initialized from a vector of integers. * * @param site The content of the site. * @param alpha The alphabet to use. * @param position The position attribute for this site. * @throw BadIntException If the content does not match the specified alphabet. */ Site(const std::vector& site, const Alphabet* alpha, int position) throw (BadIntException); /** * @brief The copy constructor. */ Site(const Site& site); /** * @brief The assignment operator. */ Site& operator=(const Site& s); virtual ~Site() {} public: /** * @name The Clonable interface * * @{ */ Site* clone() const { return new Site(*this); } /** @} */ /** * @name Setting/getting the position of the site. * * @{ */ /** * @brief Get the position of this site. * * @return This site position. */ virtual int getPosition() const { return position_; } /** * @brief Set the position of this site. * * @param position The new position of the site. */ virtual void setPosition(int position) { position_ = position; } }; // Sites comparison operators overload bool operator == (const Site& site1, const Site& site2); bool operator < (const Site& site1, const Site& site2); } //end of namespace bpp. #endif //_SITE_H_ bpp-seq-2.1.0/src/Bpp/Seq/NucleicAcidsReplication.h000644 000000 000000 00000007205 12147656566 022016 0ustar00rootroot000000 000000 // // File: NucleicAcidsReplication.h // Created by: Julien Dutheil // Created on: Fri May 20 14:20 2005 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _NUCLEICACIDSREPLICATION_H_ #define _NUCLEICACIDSREPLICATION_H_ #include "Transliterator.h" #include "Alphabet/NucleicAlphabet.h" // From the STL: #include namespace bpp { /** * @brief Replication between to nucleic acids. * * Example of use: * - DNA -> DNA: DNA Replication * - RNA -> RNA: RNA Replication * - DNA -> RNA: Transcription * - RNA -> DNA: Reverse transcription * * Since this is an instance of the ReverseIterator interface, transcription and * reverse transcription may be achieved from the same instance of the object by * using the translate and reverse methods. */ class NucleicAcidsReplication : public ReverseTransliterator { private: const NucleicAlphabet* nuc1_, * nuc2_; mutable std::map trans_; public: NucleicAcidsReplication(const NucleicAlphabet* nuc1, const NucleicAlphabet* nuc2); NucleicAcidsReplication(const NucleicAcidsReplication& nar): ReverseTransliterator(nar), nuc1_(nar.nuc1_), nuc2_(nar.nuc2_), trans_(nar.trans_) {} NucleicAcidsReplication& operator=(const NucleicAcidsReplication& nar) { ReverseTransliterator::operator=(nar); nuc1_ = nar.nuc1_; nuc2_ = nar.nuc2_; trans_ = nar.trans_; return *this; } virtual ~NucleicAcidsReplication() {} public: const Alphabet* getSourceAlphabet() const { return nuc1_; } const Alphabet* getTargetAlphabet() const { return nuc2_; } int translate(int state) const throw (BadIntException); std::string translate(const std::string& state) const throw (BadCharException); Sequence* translate(const Sequence& sequence) const throw (AlphabetMismatchException); int reverse(int state) const throw (BadIntException); std::string reverse(const std::string& state) const throw (BadCharException); Sequence* reverse(const Sequence& sequence) const throw (AlphabetMismatchException, Exception); }; } //end of namespace bpp. #endif //_NUCLEICACIDSREPLICATION_H_ bpp-seq-2.1.0/src/Bpp/Seq/SymbolList.cpp000644 000000 000000 00000036534 12147656566 017741 0ustar00rootroot000000 000000 // // File: SymbolList.cpp // Created by: Julien Dutheil // Created on: Fri Apr 9 2005 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SymbolList.h" #include "StringSequenceTools.h" using namespace bpp; using namespace std; /****************************************************************************************/ BasicSymbolList::BasicSymbolList(const std::vector& list, const Alphabet* alpha) throw (BadCharException) : alphabet_(alpha), content_() { setContent(list); } BasicSymbolList::BasicSymbolList(const std::vector& list, const Alphabet* alpha) throw (BadIntException) : alphabet_(alpha), content_() { setContent(list); } /****************************************************************************************/ BasicSymbolList::BasicSymbolList(const SymbolList& list): alphabet_(list.getAlphabet()), content_(list.getContent()) {} BasicSymbolList::BasicSymbolList(const BasicSymbolList& list): alphabet_(list.getAlphabet()), content_(list.getContent()) {} BasicSymbolList& BasicSymbolList::operator=(const SymbolList& list) { content_ = list.getContent(); alphabet_ = list.getAlphabet(); return *this; } BasicSymbolList& BasicSymbolList::operator=(const BasicSymbolList& list) { content_ = list.getContent(); alphabet_ = list.getAlphabet(); return *this; } /****************************************************************************************/ void BasicSymbolList::setContent(const vector& list) throw (BadCharException) { // Check list for incorrect characters vector coded(list.size()); for (size_t i = 0; i < list.size(); i++) if(!alphabet_->isCharInAlphabet(list[i])) throw BadCharException(list[i], "BasicSymbolList::setContent", alphabet_); for (size_t i = 0; i < list.size(); i++) coded[i] = alphabet_->charToInt(list[i]); //BasicSymbolList is valid: content_ = coded; }; /****************************************************************************************/ void BasicSymbolList::setContent(const vector& list) throw (BadIntException) { // Check list for incorrect characters for (size_t i = 0; i < list.size(); i++) if(!alphabet_->isIntInAlphabet(list[i])) throw BadIntException(list[i], "BasicSymbolList::setContent", alphabet_); //Sequence is valid: content_ = list; }; /****************************************************************************************/ string BasicSymbolList::toString() const { return StringSequenceTools::decodeSequence(content_, alphabet_); }; /****************************************************************************************/ void BasicSymbolList::addElement(const string& c) throw (BadCharException) { content_.push_back(alphabet_->charToInt(c)); } /****************************************************************************************/ void BasicSymbolList::addElement(size_t pos, const string& c) throw (BadCharException, IndexOutOfBoundsException) { if(pos >= content_.size()) throw IndexOutOfBoundsException("BasicSymbolList::addElement. Invalid position.", pos, 0, size() - 1); content_.insert(content_.begin() + pos, alphabet_->charToInt(c)); } /****************************************************************************************/ void BasicSymbolList::setElement(size_t pos, const string& c) throw (BadCharException, IndexOutOfBoundsException) { if(pos >= content_.size()) throw IndexOutOfBoundsException("BasicSymbolList::setElement. Invalid position.", pos, 0, size() - 1); content_[pos] = alphabet_->charToInt(c); } /****************************************************************************************/ string BasicSymbolList::getChar(size_t pos) const throw (IndexOutOfBoundsException) { if(pos >= content_.size()) throw IndexOutOfBoundsException("BasicSymbolList::getChar. Invalid position.", pos, 0, size() - 1); string c = ""; try { c = alphabet_->intToChar(content_[pos]); } catch(BadIntException bie) { //This should never happen! } return c; } /****************************************************************************************/ void BasicSymbolList::deleteElement(size_t pos) throw (IndexOutOfBoundsException) { if(pos >= content_.size()) throw IndexOutOfBoundsException("BasicSymbolList::deleteElement. Invalid position.", pos, 0, size() - 1); content_.erase(content_.begin() + pos); } /****************************************************************************************/ void BasicSymbolList::deleteElements(size_t pos, size_t len) throw (IndexOutOfBoundsException) { if (pos + len > content_.size()) throw IndexOutOfBoundsException("BasicSymbolList::deleteElements. Invalid position.", pos + len, 0, size() - 1); content_.erase(content_.begin() + pos, content_.begin() + pos + len); } /****************************************************************************************/ void BasicSymbolList::addElement(int v) throw (BadIntException) { //test: alphabet_->intToChar(v); content_.push_back(v); } /****************************************************************************************/ void BasicSymbolList::addElement(size_t pos, int v) throw (BadIntException, IndexOutOfBoundsException) { //test: if(pos >= content_.size()) throw IndexOutOfBoundsException("BasicSymbolList::addElement. Invalid position.", pos, 0, size() - 1); alphabet_->intToChar(v); content_.insert(content_.begin() + pos, v); } /****************************************************************************************/ void BasicSymbolList::setElement(size_t pos, int v) throw (BadIntException, IndexOutOfBoundsException) { //test: if(pos >= content_.size()) throw IndexOutOfBoundsException("BasicSymbolList::setElement. Invalid position.", pos, 0, size() - 1); alphabet_->intToChar(v); content_[pos] = v; } /****************************************************************************************/ int BasicSymbolList::getValue(size_t pos) const throw (IndexOutOfBoundsException) { if(pos >= content_.size()) throw IndexOutOfBoundsException("BasicSymbolList::getValue. Invalid position.", pos, 0, size() - 1); return content_[pos]; } /****************************************************************************************/ /****************************************************************************************/ EdSymbolList::EdSymbolList(const std::vector& list, const Alphabet* alpha) throw (BadCharException) : alphabet_(alpha), propagateEvents_(true), content_(), listeners_() { setContent(list); } EdSymbolList::EdSymbolList(const std::vector& list, const Alphabet* alpha) throw (BadIntException) : alphabet_(alpha), propagateEvents_(true), content_(), listeners_() { setContent(list); } /****************************************************************************************/ EdSymbolList::EdSymbolList(const SymbolList& list): alphabet_(list.getAlphabet()), propagateEvents_(true), content_(list.getContent()), listeners_() {} EdSymbolList::EdSymbolList(const EdSymbolList& list): alphabet_(list.getAlphabet()), propagateEvents_(list.propagateEvents_), content_(list.getContent()), listeners_(list.listeners_) { for (size_t i = 0; i < listeners_.size(); ++i) if (!list.listeners_[i]->isShared()) listeners_[i] = dynamic_cast(list.listeners_[i]->clone()); } EdSymbolList& EdSymbolList::operator=(const SymbolList& list) { content_ = list.getContent(); alphabet_ = list.getAlphabet(); propagateEvents_ = true; for (size_t i = 0; i < listeners_.size(); ++i) if (!listeners_[i]->isShared()) delete listeners_[i]; listeners_.clear(); return *this; } EdSymbolList& EdSymbolList::operator=(const EdSymbolList& list) { content_ = list.getContent(); alphabet_ = list.getAlphabet(); propagateEvents_ = list.propagateEvents_; for (size_t i = 0; i < listeners_.size(); ++i) delete listeners_[i]; listeners_ = list.listeners_; for (size_t i = 0; i < listeners_.size(); ++i) if (!list.listeners_[i]->isShared()) listeners_[i] = dynamic_cast(list.listeners_[i]->clone()); return *this; } /****************************************************************************************/ void EdSymbolList::setContent(const vector& list) throw (BadCharException) { SymbolListEditionEvent event(this); fireBeforeSequenceChanged(event); // Check list for incorrect characters vector coded(list.size()); for (size_t i = 0; i < list.size(); i++) if (!alphabet_->isCharInAlphabet(list[i])) throw BadCharException(list[i], "EdSymbolList::setContent", alphabet_); for (size_t i = 0; i < list.size(); i++) coded[i] = alphabet_->charToInt(list[i]); //SymbolList is valid: content_ = coded; fireAfterSequenceChanged(event); }; /****************************************************************************************/ void EdSymbolList::setContent(const vector& list) throw (BadIntException) { SymbolListEditionEvent event(this); fireBeforeSequenceChanged(event); // Check list for incorrect characters for (size_t i = 0; i < list.size(); i++) if(!alphabet_->isIntInAlphabet(list[i])) throw BadIntException(list[i], "EdSymbolList::setContent", alphabet_); //Sequence is valid: content_ = list; fireAfterSequenceChanged(event); }; /****************************************************************************************/ string EdSymbolList::toString() const { return StringSequenceTools::decodeSequence(content_, alphabet_); }; /****************************************************************************************/ void EdSymbolList::addElement(const string& c) throw (BadCharException) { SymbolListInsertionEvent event(this, size(), 1); fireBeforeSequenceInserted(event); content_.push_back(alphabet_->charToInt(c)); fireAfterSequenceInserted(event); } /****************************************************************************************/ void EdSymbolList::addElement(size_t pos, const string& c) throw (BadCharException, IndexOutOfBoundsException) { if (pos >= content_.size()) throw IndexOutOfBoundsException("EdSymbolList::addElement. Invalid position.", pos, 0, size() - 1); SymbolListInsertionEvent event(this, pos, 1); fireBeforeSequenceInserted(event); content_.insert(content_.begin() + pos, alphabet_->charToInt(c)); fireAfterSequenceInserted(event); } /****************************************************************************************/ void EdSymbolList::setElement(size_t pos, const string& c) throw (BadCharException, IndexOutOfBoundsException) { if (pos >= content_.size()) throw IndexOutOfBoundsException("EdSymbolList::setElement. Invalid position.", pos, 0, size() - 1); SymbolListSubstitutionEvent event(this, pos, pos); fireBeforeSequenceSubstituted(event); content_[pos] = alphabet_->charToInt(c); fireAfterSequenceSubstituted(event); } /****************************************************************************************/ string EdSymbolList::getChar(size_t pos) const throw (IndexOutOfBoundsException) { if (pos >= content_.size()) throw IndexOutOfBoundsException("EdSymbolList::getChar. Invalid position.", pos, 0, size() - 1); string c = ""; try { c = alphabet_->intToChar(content_[pos]); } catch(BadIntException bie) { //This should never happen! } return c; } /****************************************************************************************/ void EdSymbolList::deleteElement(size_t pos) throw (IndexOutOfBoundsException) { if (pos >= content_.size()) throw IndexOutOfBoundsException("EdSymbolList::deleteElement. Invalid position.", pos, 0, size() - 1); SymbolListDeletionEvent event(this, pos, 1); fireBeforeSequenceDeleted(event); content_.erase(content_.begin() + pos); fireAfterSequenceDeleted(event); } /****************************************************************************************/ void EdSymbolList::deleteElements(size_t pos, size_t len) throw (IndexOutOfBoundsException) { if(pos + len > content_.size()) throw IndexOutOfBoundsException("EdSymbolList::deleteElements. Invalid position.", pos + len, 0, size() - 1); SymbolListDeletionEvent event(this, pos, len); fireBeforeSequenceDeleted(event); content_.erase(content_.begin() + pos, content_.begin() + pos + len); fireAfterSequenceDeleted(event); } /****************************************************************************************/ void EdSymbolList::addElement(int v) throw (BadIntException) { SymbolListInsertionEvent event(this, size(), 1); fireBeforeSequenceInserted(event); //test: alphabet_->intToChar(v); content_.push_back(v); fireAfterSequenceInserted(event); } /****************************************************************************************/ void EdSymbolList::addElement(size_t pos, int v) throw (BadIntException, IndexOutOfBoundsException) { //test: if (pos >= content_.size()) throw IndexOutOfBoundsException("EdSymbolList::addElement. Invalid position.", pos, 0, size() - 1); SymbolListInsertionEvent event(this, pos, 1); fireBeforeSequenceInserted(event); alphabet_->intToChar(v); content_.insert(content_.begin() + pos, v); fireAfterSequenceInserted(event); } /****************************************************************************************/ void EdSymbolList::setElement(size_t pos, int v) throw (BadIntException, IndexOutOfBoundsException) { //test: if (pos >= content_.size()) throw IndexOutOfBoundsException("EdSymbolList::setElement. Invalid position.", pos, 0, size() - 1); SymbolListSubstitutionEvent event(this, pos, pos); fireBeforeSequenceSubstituted(event); alphabet_->intToChar(v); content_[pos] = v; fireAfterSequenceSubstituted(event); } /****************************************************************************************/ int EdSymbolList::getValue(size_t pos) const throw (IndexOutOfBoundsException) { if (pos >= content_.size()) throw IndexOutOfBoundsException("EdSymbolList::getValue. Invalid position.", pos, 0, size() - 1); return content_[pos]; } /****************************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/SiteIterator.h000644 000000 000000 00000004462 12147656566 017716 0ustar00rootroot000000 000000 // // File: SiteIterator.h // Created by: Julien Dutheil // Created on: Sun Oct 19 12:47:16 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SITEITERATOR_H_ #define _SITEITERATOR_H_ #include "Site.h" namespace bpp { /** * @brief Generic site iterator interface, allowing to loop over sites. */ class SiteIterator { public: SiteIterator() {} virtual ~SiteIterator() {} public: virtual Site* nextSite() = 0; virtual bool hasMoreSites() const = 0; }; /** * @brief Generic const site iterator interface, allowing to loop over const sites. */ class ConstSiteIterator { public: ConstSiteIterator() {} virtual ~ConstSiteIterator() {} public: virtual const Site* nextSite() = 0; virtual bool hasMoreSites() const = 0; }; } //end of namespace bpp. #endif //_SITEITERATOR_H_ bpp-seq-2.1.0/src/Bpp/Seq/GeneticCode/GeneticCode.h000644 000000 000000 00000013623 12147656566 021621 0ustar00rootroot000000 000000 // // File: GeneticCode.h // Created by: Julien Dutheil // Created on: Mon Oct 13 15:37:25 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _GENETICCODE_H_ #define _GENETICCODE_H_ #include "../Transliterator.h" #include "../Alphabet/CodonAlphabet.h" #include "../Alphabet/ProteicAlphabet.h" #include namespace bpp { /** * @brief Exception thrown when a stop codon is found. */ class StopCodonException: public Exception { private: std::string codon_; public: // Class constructor StopCodonException(const std::string& text, const std::string& codon); // Class destructor virtual ~StopCodonException() throw () {} public: virtual const std::string& getCodon() const { return codon_; } }; /** * @brief Partial implementation of the Transliterator interface for genetic code object. * * A genetic code object if a translator from a codon alphabet to a proteic alphabet. * Depending on the codon alphabet used, several genetic code can be implemented. * * @see CodonAlphabet, ProteicAlphabet */ class GeneticCode: public AbstractTransliterator { protected: const CodonAlphabet* codonAlphabet_; const ProteicAlphabet* proteicAlphabet_; public: GeneticCode(): AbstractTransliterator() , codonAlphabet_(0), proteicAlphabet_(0) {} GeneticCode(const GeneticCode& gc): AbstractTransliterator(gc), codonAlphabet_(gc.codonAlphabet_), proteicAlphabet_(gc.proteicAlphabet_) {} GeneticCode& operator=(const GeneticCode& gc) { AbstractTransliterator::operator=(gc); codonAlphabet_ = gc.codonAlphabet_; proteicAlphabet_ = gc.proteicAlphabet_; return *this; } virtual ~GeneticCode() {} public: /** * @name Methods form the Transliterator interface. * * @{ */ const Alphabet* getSourceAlphabet() const { return codonAlphabet_; } const Alphabet* getTargetAlphabet() const { return proteicAlphabet_; } virtual int translate(int state) const throw (BadIntException, Exception) = 0; virtual std::string translate(const std::string& state) const throw (BadCharException, Exception) = 0; virtual Sequence* translate(const Sequence& sequence) const throw (Exception) { return AbstractTransliterator::translate(sequence); } /** @} */ public: /** * @name Specific methods. * * @{ */ bool areSynonymous(int i, int j) const throw (BadIntException) { return (translate(i) == translate(j)); } bool areSynonymous(const std::string & i, const std::string & j) const throw (BadCharException) { return (translate(i) == translate(j)); } std::vector getSynonymous(int aminoacid) const throw (BadIntException); std::vector getSynonymous(const std::string & aminoacid) const throw (BadCharException); /** * @return True if the specified codon is fourfold degenerated * (that is, if a mutation in the fourth position does not change the aminoacid). * @author Benoit Nabholz, Annabelle Haudry * @param codon The codon to test. */ bool isFourFoldDegenerated(int codon) const; /** * @brief Get the subsequence corresponding to the coding part of a given sequence. * * If lookForInitCodon if set to 'true', the subsequence will start at the first AUG motif, * otherwise the subsequence will start at the begining of the sequence. * The subsequence ends at the first stop codon (excluded) found, or the end of the sequence. * * The sequence may have a nucleotide or codon alphabet. * The subsequence has the same alphabet, name and comments of the input sequence. * In case of nucleotide sequence and if the lookForInitCodon option is checked, the phase * will be determined from the sequence. * * @param sequence The sequence to parse. * @param lookForInitCodon Tell if the AUG codon must be found. * @param includeInitCodon (if lookForInitCodon is true) tell if the init codon must be included in the subsequence. * @return A nucleotide/codon subsequence. */ Sequence* getCodingSequence(const Sequence& sequence, bool lookForInitCodon = false, bool includeInitCodon = false) const throw (Exception); /** @} */ }; } //end of namespace bpp. #endif //_GENETICCODE_H_ bpp-seq-2.1.0/src/Bpp/Seq/GeneticCode/StandardGeneticCode.h000644 000000 000000 00000004664 12147656566 023307 0ustar00rootroot000000 000000 // // File: StandardGeneticCode.h // Created by: Julien Dutheil // Created on: Mon Oct 13 15:39:17 2003 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _STANDARDGENETICCODE_H_ #define _STANDARDGENETICCODE_H_ #include "GeneticCode.h" #include "../Alphabet/NucleicAlphabet.h" namespace bpp { /** * @brief This class implements the standard genetic code as describe on the NCBI * web site: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG1 */ class StandardGeneticCode: public GeneticCode { public: StandardGeneticCode(const NucleicAlphabet * alpha); virtual ~StandardGeneticCode(); public: int translate(int state) const throw (Exception); std::string translate(const std::string & state) const throw (Exception); Sequence * translate(const Sequence & sequence) const throw (Exception) { return GeneticCode::translate(sequence); } }; } //end of namespace bpp. #endif //_STANDARDGENETICCODE_H_ bpp-seq-2.1.0/src/Bpp/Seq/GeneticCode/YeastMitochondrialGeneticCode.h000644 000000 000000 00000005037 12147656566 025344 0ustar00rootroot000000 000000 // // File: YeastbrateMitochondrialGeneticCode.h // Created by: Benoit Nabholz // Created on: Sun Oct 10 14:33 CET 2010 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _YEASTMITOCHONDRIALGENETICCODE_H_ #define _YEASTMITOCHONDRIALGENETICCODE_H_ #include "GeneticCode.h" #include "../Alphabet/NucleicAlphabet.h" namespace bpp { /** * @brief This class implements the Invertebrate * Mitochondrial genetic code as describe on the NCBI website: * http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG3 */ class YeastMitochondrialGeneticCode: public GeneticCode { public: YeastMitochondrialGeneticCode(const NucleicAlphabet * alpha); virtual ~YeastMitochondrialGeneticCode(); public: int translate(int state) const throw (Exception); std::string translate(const std::string & state) const throw (Exception); Sequence * translate(const Sequence & sequence) const throw (Exception) { return GeneticCode::translate(sequence); } }; } //end of namespace bpp. #endif //_YEASTMITOCHONDRIALGENETICCODE_H_ bpp-seq-2.1.0/src/Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.h000644 000000 000000 00000005051 12147656566 026356 0ustar00rootroot000000 000000 // // File: VertebrateMitochondrialGeneticCode.h // Created by: Eric Bazin // Created on: wen mar 2 16:01:59 CET 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _VERTEBRATEMITOCHONDRIALGENETICCODE_H_ #define _VERTEBRATEMITOCHONDRIALGENETICCODE_H_ #include "GeneticCode.h" #include "../Alphabet/NucleicAlphabet.h" namespace bpp { /** * @brief This class implements the vertebrate mitochondrial genetic code as describe on the NCBI * web site: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG2 */ class VertebrateMitochondrialGeneticCode: public GeneticCode { public: VertebrateMitochondrialGeneticCode(const NucleicAlphabet* alpha); virtual ~VertebrateMitochondrialGeneticCode(); public: int translate(int state) const throw (Exception); std::string translate(const std::string& state) const throw (Exception); Sequence* translate(const Sequence& sequence) const throw (Exception) { return GeneticCode::translate(sequence); } }; } //end of namespace bpp. #endif //_VERTEBRATEMITOCHONDRIALGENETICCODE_H_ bpp-seq-2.1.0/src/Bpp/Seq/GeneticCode/InvertebrateMitochondrialGeneticCode.cpp000644 000000 000000 00000013566 12147656566 027252 0ustar00rootroot000000 000000 // // File: InvertebrateMitochondrialGeneticCode.cpp // Created by: Eric Bazin // Created on: wen mar 2 15:22:46 CET 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "InvertebrateMitochondrialGeneticCode.h" #include "../Alphabet/InvertebrateMitochondrialCodonAlphabet.h" #include "../Alphabet/ProteicAlphabet.h" using namespace bpp; #include using namespace std; InvertebrateMitochondrialGeneticCode::InvertebrateMitochondrialGeneticCode(const NucleicAlphabet * alpha) : GeneticCode() { codonAlphabet_ = new InvertebrateMitochondrialCodonAlphabet(alpha); proteicAlphabet_ = new ProteicAlphabet(); } InvertebrateMitochondrialGeneticCode::~InvertebrateMitochondrialGeneticCode() { delete codonAlphabet_; delete proteicAlphabet_; } int InvertebrateMitochondrialGeneticCode::translate(int state) const throw (Exception) { if(state == codonAlphabet_->getUnknownCharacterCode()) return proteicAlphabet_->getUnknownCharacterCode(); vector positions = codonAlphabet_->getPositions(state); switch(positions[0]) { //First position: case 0 : //A switch(positions[1]) { //Second position: case 0 : //AA switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("K"); //Lysine case 1 : case 3 : return proteicAlphabet_->charToInt("N"); //Asparagine } case 1 : //AC return proteicAlphabet_->charToInt("T"); //Threonine case 2 : //AG switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("S"); //Serine case 1 : case 3 : return proteicAlphabet_->charToInt("S"); //Serine } case 3 : //AT switch(positions[2]) { //Third position: case 0 : case 2: return proteicAlphabet_->charToInt("M"); //Methionine case 1 : case 3 : return proteicAlphabet_->charToInt("I"); //Isoleucine } } case 1 : //C switch(positions[1]) { //Second position: case 0 : //CA switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("Q"); //Glutamine case 1 : case 3 : return proteicAlphabet_->charToInt("H"); //Histidine } case 1 : //CC return proteicAlphabet_->charToInt("P"); //Proline case 2 : //CG return proteicAlphabet_->charToInt("R"); //Arginine case 3 : //CT return proteicAlphabet_->charToInt("L"); //Leucine } case 2 : //G switch(positions[1]) { //Second position: case 0 : //GA switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("E"); //Glutamic acid case 1 : case 3 : return proteicAlphabet_->charToInt("D"); //Aspartic acid } case 1 : //GC return proteicAlphabet_->charToInt("A"); //Alanine case 2 : //GG return proteicAlphabet_->charToInt("G"); //Glycine case 3 : //GT return proteicAlphabet_->charToInt("V"); //Valine } case 3 : //T(U) switch(positions[1]) { //Second position: case 0 : //TA switch(positions[2]) { //Third position: case 0 : throw StopCodonException("", "TAA"); //Stop codon case 2 : throw StopCodonException("", "TAG"); //Stop codon case 1 : case 3 : return proteicAlphabet_->charToInt("Y"); //Tyrosine } case 1 : //TC return proteicAlphabet_->charToInt("S"); //Serine case 2 : //TG switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("W"); //Tryptophane case 1 : case 3 : return proteicAlphabet_->charToInt("C"); //Cysteine } case 3 : //TT switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("L"); //Leucine case 1 : case 3 : return proteicAlphabet_->charToInt("F"); //Phenylalanine } } } throw BadIntException(state, "InvertebrateMitochondrialGeneticCode::translate", codonAlphabet_); } string InvertebrateMitochondrialGeneticCode::translate(const string & state) const throw (Exception) { return proteicAlphabet_->intToChar(translate(codonAlphabet_->charToInt(state))); } bpp-seq-2.1.0/src/Bpp/Seq/GeneticCode/YeastMitochondrialGeneticCode.cpp000644 000000 000000 00000013461 12147656566 025677 0ustar00rootroot000000 000000 // // File: YeastbrateMitochondrialGeneticCode.cpp // Created by: Benoit Nabholz // Created on: Sun Oct 10 14:33 CET 2010 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "YeastMitochondrialGeneticCode.h" #include "../Alphabet/YeastMitochondrialCodonAlphabet.h" #include "../Alphabet/ProteicAlphabet.h" using namespace bpp; #include using namespace std; YeastMitochondrialGeneticCode::YeastMitochondrialGeneticCode(const NucleicAlphabet * alpha) : GeneticCode() { codonAlphabet_ = new YeastMitochondrialCodonAlphabet(alpha); proteicAlphabet_ = new ProteicAlphabet(); } YeastMitochondrialGeneticCode::~YeastMitochondrialGeneticCode() { delete codonAlphabet_; delete proteicAlphabet_; } int YeastMitochondrialGeneticCode::translate(int state) const throw (Exception) { if(state == codonAlphabet_->getUnknownCharacterCode()) return proteicAlphabet_->getUnknownCharacterCode(); vector positions = codonAlphabet_->getPositions(state); switch(positions[0]) { //First position: case 0 : //A switch(positions[1]) { //Second position: case 0 : //AA switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("K"); //Lysine case 1 : case 3 : return proteicAlphabet_->charToInt("N"); //Asparagine } case 1 : //AC return proteicAlphabet_->charToInt("T"); //Threonine case 2 : //AG switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("S"); //Serine case 1 : case 3 : return proteicAlphabet_->charToInt("S"); //Serine } case 3 : //AT switch(positions[2]) { //Third position: case 0 : case 2: return proteicAlphabet_->charToInt("M"); //Methionine case 1 : case 3 : return proteicAlphabet_->charToInt("I"); //Isoleucine } } case 1 : //C switch(positions[1]) { //Second position: case 0 : //CA switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("Q"); //Glutamine case 1 : case 3 : return proteicAlphabet_->charToInt("H"); //Histidine } case 1 : //CC return proteicAlphabet_->charToInt("P"); //Proline case 2 : //CG return proteicAlphabet_->charToInt("R"); //Arginine case 3 : //CT return proteicAlphabet_->charToInt("T"); //Threonine } case 2 : //G switch(positions[1]) { //Second position: case 0 : //GA switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("E"); //Glutamic acid case 1 : case 3 : return proteicAlphabet_->charToInt("D"); //Aspartic acid } case 1 : //GC return proteicAlphabet_->charToInt("A"); //Alanine case 2 : //GG return proteicAlphabet_->charToInt("G"); //Glycine case 3 : //GT return proteicAlphabet_->charToInt("V"); //Valine } case 3 : //T(U) switch(positions[1]) { //Second position: case 0 : //TA switch(positions[2]) { //Third position: case 0 : throw StopCodonException("", "TAA"); //Stop codon case 2 : throw StopCodonException("", "TAG"); //Stop codon case 1 : case 3 : return proteicAlphabet_->charToInt("Y"); //Tyrosine } case 1 : //TC return proteicAlphabet_->charToInt("S"); //Serine case 2 : //TG switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("W"); //Tryptophane case 1 : case 3 : return proteicAlphabet_->charToInt("C"); //Cysteine } case 3 : //TT switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("L"); //Leucine case 1 : case 3 : return proteicAlphabet_->charToInt("F"); //Phenylalanine } } } throw BadIntException(state, "YeastMitochondrialGeneticCode::translate", codonAlphabet_); } string YeastMitochondrialGeneticCode::translate(const string & state) const throw (Exception) { return proteicAlphabet_->intToChar(translate(codonAlphabet_->charToInt(state))); } bpp-seq-2.1.0/src/Bpp/Seq/GeneticCode/GeneticCode.cpp000644 000000 000000 00000013653 12147656566 022157 0ustar00rootroot000000 000000 // // File: GeneticCode.cpp // Created by: Julien Dutheil // Created on: Mon Oct 13 15:37:25 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "GeneticCode.h" #include "../SequenceTools.h" #include "../Alphabet/AlphabetTools.h" using namespace bpp; using namespace std; /**********************************************************************************************/ StopCodonException::StopCodonException(const std::string& text, const std::string& codon) : Exception("StopCodonException: " + text + "(" + codon + ")"), codon_(codon) {} /**********************************************************************************************/ vector GeneticCode::getSynonymous(int aminoacid) const throw (BadIntException) { // test: proteicAlphabet_->intToChar(aminoacid); vector synonymes; for (unsigned int i = 0; i < codonAlphabet_->getSize(); ++i) { try { if (translate(i) == aminoacid) synonymes.push_back(i); } catch (StopCodonException) { } } return synonymes; } /**********************************************************************************************/ std::vector GeneticCode::getSynonymous(const std::string& aminoacid) const throw (BadCharException) { // test: int aa = proteicAlphabet_->charToInt(aminoacid); vector synonymes; for (unsigned int i = 0; i < codonAlphabet_->getSize(); ++i) { try { if (translate(i) == aa) synonymes.push_back(codonAlphabet_->intToChar(i)); } catch (StopCodonException) { } } return synonymes; } /**********************************************************************************************/ bool GeneticCode::isFourFoldDegenerated(int val) const { if (codonAlphabet_->isStop(val)) return false; vector codon = codonAlphabet_->getPositions(val); int acid = translate(val); // test all the substitution on third codon position for (int an = 0; an < 4; an++) { if (an == codon[2]) continue; vector mutcodon = codon; mutcodon[2] = an; int intcodon = codonAlphabet_->getCodon(mutcodon[0], mutcodon[1], mutcodon[2]); if (codonAlphabet_->isStop(intcodon)) return false; ; int altacid = translate(intcodon); if (altacid != acid) // if non-synonymous { return false; } } return true; } /**********************************************************************************************/ Sequence* GeneticCode::getCodingSequence(const Sequence& sequence, bool lookForInitCodon, bool includeInitCodon) const throw (Exception) { size_t initPos = 0; size_t stopPos = sequence.size(); if (AlphabetTools::isCodonAlphabet(sequence.getAlphabet())) { // Look for AUG(or ATG) codon: if (lookForInitCodon) { for (unsigned int i = 0; i < sequence.size(); i++) { vector pos = codonAlphabet_->getPositions(sequence[i]); if (pos[0] == 0 && pos[1] == 3 && pos[2] == 2) { initPos = includeInitCodon ? i : i + 1; break; } } } // Look for stop codon: for (size_t i = initPos; i < sequence.size(); i++) { if (codonAlphabet_->isStop(sequence[i])) { stopPos = i; break; } } } else if (AlphabetTools::isNucleicAlphabet(sequence.getAlphabet())) { // Look for AUG(or ATG) codon: if (lookForInitCodon) { for (unsigned int i = 0; i < sequence.size() - 2; i++) { if (sequence[i] == 0 && sequence[i + 1] == 3 && sequence[i + 2] == 2) { initPos = includeInitCodon ? i : i + 3; break; } } } // Look for stop codon: const NucleicAlphabet* nucAlpha = codonAlphabet_->getNucleicAlphabet(); for (size_t i = initPos; i < sequence.size() - 2; i += 3) { string codon = nucAlpha->intToChar(sequence[i]) + nucAlpha->intToChar(sequence[i + 1]) + nucAlpha->intToChar(sequence[i + 2]); if (codonAlphabet_->isStop(codon)) { stopPos = i; break; } } } else throw AlphabetMismatchException("Sequence must have alphabet of type nucleic or codon in GeneticCode::getCodingSequence.", 0, sequence.getAlphabet()); return SequenceTools::subseq(sequence, initPos, stopPos - 1); } /**********************************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/GeneticCode/EchinodermMitochondrialGeneticCode.h000644 000000 000000 00000005055 12147656566 026334 0ustar00rootroot000000 000000 // // File: EchinodermMitochondrialGeneticCode.h // Created by: Eric Bazin // Created on: 14 11:31:27 CET 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ECHINODERMMITOCHONDRIALGENETICCODE_H_ #define _ECHINODERMMITOCHONDRIALGENETICCODE_H_ #include "GeneticCode.h" #include "../Alphabet/NucleicAlphabet.h" namespace bpp { /** * @brief This class implements the Echinoderm and Faltworms * Mitochondrial genetic code as describe on the NCBI website: * http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG9 */ class EchinodermMitochondrialGeneticCode: public GeneticCode { public: EchinodermMitochondrialGeneticCode(const NucleicAlphabet * alpha); virtual ~EchinodermMitochondrialGeneticCode(); public: int translate(int state) const throw (Exception); std::string translate(const std::string & state) const throw (Exception); Sequence * translate(const Sequence & sequence) const throw (Exception) { return GeneticCode::translate(sequence); } }; } //end of namespace bpp. #endif //_ECHINODERMMITOCHONDRIALGENETICCODE_H_ bpp-seq-2.1.0/src/Bpp/Seq/GeneticCode/StandardGeneticCode.cpp000644 000000 000000 00000013455 12147656566 023640 0ustar00rootroot000000 000000 // // File: StandardGeneticCode.cpp // Created by: Julien Dutheil // Created on: Mon Oct 13 15:39:17 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "StandardGeneticCode.h" #include "../Alphabet/StandardCodonAlphabet.h" #include "../Alphabet/ProteicAlphabet.h" using namespace bpp; #include using namespace std; StandardGeneticCode::StandardGeneticCode(const NucleicAlphabet* alpha) : GeneticCode() { codonAlphabet_ = new StandardCodonAlphabet(alpha); proteicAlphabet_ = new ProteicAlphabet(); } StandardGeneticCode::~StandardGeneticCode() { delete codonAlphabet_; delete proteicAlphabet_; } int StandardGeneticCode::translate(int state) const throw (Exception) { if (state == codonAlphabet_->getUnknownCharacterCode()) return proteicAlphabet_->getUnknownCharacterCode(); vector positions = codonAlphabet_->getPositions(state); switch (positions[0]) { //First position: case 0 : //A switch (positions[1]) { //Second position: case 0 : //AA switch (positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("K"); //Lysine case 1 : case 3 : return proteicAlphabet_->charToInt("N"); //Asparagine } case 1 : //AC return proteicAlphabet_->charToInt("T"); //Threonine case 2 : //AG switch (positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("R"); //Arginine case 1 : case 3 : return proteicAlphabet_->charToInt("S"); //Serine } case 3 : //AT switch (positions[2]) { //Third position: case 2: return proteicAlphabet_->charToInt("M"); //Methionine case 0 : case 1 : case 3 : return proteicAlphabet_->charToInt("I"); //Isoleucine } } case 1 : //C switch (positions[1]) { //Second position: case 0 : //CA switch (positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("Q"); //Glutamine case 1 : case 3 : return proteicAlphabet_->charToInt("H"); //Histidine } case 1 : //CC return proteicAlphabet_->charToInt("P"); //Proline case 2 : //CG return proteicAlphabet_->charToInt("R"); //Arginine case 3 : //CT return proteicAlphabet_->charToInt("L"); //Leucine } case 2 : //G switch (positions[1]) { //Second position: case 0 : //GA switch (positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("E"); //Glutamic acid case 1 : case 3 : return proteicAlphabet_->charToInt("D"); //Aspartic acid } case 1 : //GC return proteicAlphabet_->charToInt("A"); //Alanine case 2 : //GG return proteicAlphabet_->charToInt("G"); //Glycine case 3 : //GT return proteicAlphabet_->charToInt("V"); //Valine } case 3 : //T(U) switch (positions[1]) { //Second position: case 0 : //TA switch (positions[2]) { //Third position: case 0 : throw StopCodonException("", "TAA"); //Stop codon case 2 : throw StopCodonException("", "TAG"); //Stop codon case 1 : case 3 : return proteicAlphabet_->charToInt("Y"); //Tyrosine } case 1 : //TC return proteicAlphabet_->charToInt("S"); //Serine case 2 : //TG switch (positions[2]) { //Third position: case 0 : throw StopCodonException("", "TGA"); //Stop codon case 2 : return proteicAlphabet_->charToInt("W"); //Tryptophane case 1 : case 3 : return proteicAlphabet_->charToInt("C"); //Cysteine } case 3 : //TT switch (positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("L"); //Leucine case 1 : case 3 : return proteicAlphabet_->charToInt("F"); //Phenylalanine } } } throw BadIntException(state, "StandardGeneticCode::translate", codonAlphabet_); } string StandardGeneticCode::translate(const string& state) const throw (Exception) { return proteicAlphabet_->intToChar(translate(codonAlphabet_->charToInt(state))); } bpp-seq-2.1.0/src/Bpp/Seq/GeneticCode/InvertebrateMitochondrialGeneticCode.h000644 000000 000000 00000005070 12147656566 026706 0ustar00rootroot000000 000000 // // File: InvertebrateMitochondrialGeneticCode.h // Created by: Eric Bazin // Created on: wen mar 2 15:22:46 CET 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _INVERTEBRATEMITOCHONDRIALGENETICCODE_H_ #define _INVERTEBRATEMITOCHONDRIALGENETICCODE_H_ #include "GeneticCode.h" #include "../Alphabet/NucleicAlphabet.h" namespace bpp { /** * @brief This class implements the Invertebrate * Mitochondrial genetic code as describe on the NCBI website: * http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG5 */ class InvertebrateMitochondrialGeneticCode: public GeneticCode { public: InvertebrateMitochondrialGeneticCode(const NucleicAlphabet * alpha); virtual ~InvertebrateMitochondrialGeneticCode(); public: int translate(int state) const throw (Exception); std::string translate(const std::string & state) const throw (Exception); Sequence * translate(const Sequence & sequence) const throw (Exception) { return GeneticCode::translate(sequence); } }; } //end of namespace bpp. #endif //_INVERTEBRATEMITOCHONDRIALGENETICCODE_H_ bpp-seq-2.1.0/src/Bpp/Seq/GeneticCode/EchinodermMitochondrialGeneticCode.cpp000644 000000 000000 00000013722 12147656566 026667 0ustar00rootroot000000 000000 // // File: EchinodermMitochondrialGeneticCode.cpp // Created by: Eric Bazin // Created on: 14 11:31:27 CET 2005 /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "EchinodermMitochondrialGeneticCode.h" #include "../Alphabet/EchinodermMitochondrialCodonAlphabet.h" #include "../Alphabet/ProteicAlphabet.h" using namespace bpp; #include using namespace std; EchinodermMitochondrialGeneticCode::EchinodermMitochondrialGeneticCode(const NucleicAlphabet * alpha) : GeneticCode() { codonAlphabet_ = new EchinodermMitochondrialCodonAlphabet(alpha); proteicAlphabet_ = new ProteicAlphabet(); } EchinodermMitochondrialGeneticCode::~EchinodermMitochondrialGeneticCode() { delete codonAlphabet_; delete proteicAlphabet_; } int EchinodermMitochondrialGeneticCode::translate(int state) const throw (Exception) { if(state == codonAlphabet_->getUnknownCharacterCode()) return proteicAlphabet_->getUnknownCharacterCode(); vector positions = codonAlphabet_->getPositions(state); switch(positions[0]) { //First position: case 0 : //A switch(positions[1]) { //Second position: case 0 : //AA switch(positions[2]) { //Third position: case 2 : return proteicAlphabet_->charToInt("K"); //Lysine case 0 : case 1 : case 3 : return proteicAlphabet_->charToInt("N"); //Asparagine } case 1 : //AC return proteicAlphabet_->charToInt("T"); //Threonine case 2 : //AG switch(positions[2]) { //Third position: case 0 : case 1 : case 2 : case 3 : return proteicAlphabet_->charToInt("S"); //Serine } case 3 : //AT switch(positions[2]) { //Third position: case 2: return proteicAlphabet_->charToInt("M"); //Methionine case 0 : case 1 : case 3 : return proteicAlphabet_->charToInt("I"); //Isoleucine } } case 1 : //C switch(positions[1]) { //Second position: case 0 : //CA switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("Q"); //Glutamine case 1 : case 3 : return proteicAlphabet_->charToInt("H"); //Histidine } case 1 : //CC return proteicAlphabet_->charToInt("P"); //Proline case 2 : //CG return proteicAlphabet_->charToInt("R"); //Arginine case 3 : //CT return proteicAlphabet_->charToInt("L"); //Leucine } case 2 : //G switch(positions[1]) { //Second position: case 0 : //GA switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("E"); //Glutamic acid case 1 : case 3 : return proteicAlphabet_->charToInt("D"); //Aspartic acid } case 1 : //GC return proteicAlphabet_->charToInt("A"); //Alanine case 2 : //GG return proteicAlphabet_->charToInt("G"); //Glycine case 3 : //GT return proteicAlphabet_->charToInt("V"); //Valine } case 3 : //T(U) switch(positions[1]) { //Second position: case 0 : //TA switch(positions[2]) { //Third position: case 0 : throw StopCodonException("", "TAA"); //Stop codon case 2 : throw StopCodonException("", "TAG"); //Stop codon case 1 : case 3 : return proteicAlphabet_->charToInt("Y"); //Tyrosine } case 1 : //TC return proteicAlphabet_->charToInt("S"); //Serine case 2 : //TG switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("W"); //Tryptophane case 1 : case 3 : return proteicAlphabet_->charToInt("C"); //Cysteine } case 3 : //TT switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("L"); //Leucine case 1 : case 3 : return proteicAlphabet_->charToInt("F"); //Phenylalanine } } } throw BadIntException(state, "EchinodermMitochondrialGeneticCode::translate", codonAlphabet_); } string EchinodermMitochondrialGeneticCode::translate(const string & state) const throw (Exception) { return proteicAlphabet_->intToChar(translate(codonAlphabet_->charToInt(state))); } bpp-seq-2.1.0/src/Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.cpp000644 000000 000000 00000013647 12147656566 026723 0ustar00rootroot000000 000000 // // File: VertebrateMitochondrialGeneticCode.cpp // Created by: Eric Bazin // Created on: wen mar 2 16:01:59 CET 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "VertebrateMitochondrialGeneticCode.h" #include "../Alphabet/VertebrateMitochondrialCodonAlphabet.h" #include "../Alphabet/ProteicAlphabet.h" using namespace bpp; #include using namespace std; VertebrateMitochondrialGeneticCode::VertebrateMitochondrialGeneticCode(const NucleicAlphabet * alpha) : GeneticCode() { codonAlphabet_ = new VertebrateMitochondrialCodonAlphabet(alpha); proteicAlphabet_ = new ProteicAlphabet(); } VertebrateMitochondrialGeneticCode::~VertebrateMitochondrialGeneticCode() { delete codonAlphabet_; delete proteicAlphabet_; } int VertebrateMitochondrialGeneticCode::translate(int state) const throw (Exception) { if(state == codonAlphabet_->getUnknownCharacterCode()) return proteicAlphabet_->getUnknownCharacterCode(); vector positions = codonAlphabet_->getPositions(state); switch(positions[0]) { //First position: case 0 : //A switch(positions[1]) { //Second position: case 0 : //AA switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("K"); //Lysine case 1 : case 3 : return proteicAlphabet_->charToInt("N"); //Asparagine } case 1 : //AC return proteicAlphabet_->charToInt("T"); //Threonine case 2 : //AG switch(positions[2]) { //Third position: case 0 : throw StopCodonException("", "AGA"); //Stop case 2 : throw StopCodonException("", "AGG"); //Stop case 1 : case 3 : return proteicAlphabet_->charToInt("S"); //Serine } case 3 : //AT switch(positions[2]) { //Third position: case 2 : case 0 : return proteicAlphabet_->charToInt("M"); //Methionine case 1 : case 3 : return proteicAlphabet_->charToInt("I"); //Isoleucine } } case 1 : //C switch(positions[1]) { //Second position: case 0 : //CA switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("Q"); //Glutamine case 1 : case 3 : return proteicAlphabet_->charToInt("H"); //Histidine } case 1 : //CC return proteicAlphabet_->charToInt("P"); //Proline case 2 : //CG return proteicAlphabet_->charToInt("R"); //Arginine case 3 : //CT return proteicAlphabet_->charToInt("L"); //Leucine } case 2 : //G switch(positions[1]) { //Second position: case 0 : //GA switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("E"); //Glutamic acid case 1 : case 3 : return proteicAlphabet_->charToInt("D"); //Aspartic acid } case 1 : //GC return proteicAlphabet_->charToInt("A"); //Alanine case 2 : //GG return proteicAlphabet_->charToInt("G"); //Glycine case 3 : //GT return proteicAlphabet_->charToInt("V"); //Valine } case 3 : //T(U) switch(positions[1]) { //Second position: case 0 : //TA switch(positions[2]) { //Third position: case 0 : throw StopCodonException("", "TAA"); //Stop codon case 2 : throw StopCodonException("", "TAG"); //Stop codon case 1 : case 3 : return proteicAlphabet_->charToInt("Y"); //Tyrosine } case 1 : //TC return proteicAlphabet_->charToInt("S"); //Serine case 2 : //TG switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("W"); //Tryptophane case 1 : case 3 : return proteicAlphabet_->charToInt("C"); //Cysteine } case 3 : //TT switch(positions[2]) { //Third position: case 0 : case 2 : return proteicAlphabet_->charToInt("L"); //Leucine case 1 : case 3 : return proteicAlphabet_->charToInt("F"); //Phenylalanine } } } throw BadIntException(state, "VertebrateMitochondrialGeneticCode::translate", codonAlphabet_); } string VertebrateMitochondrialGeneticCode::translate(const string & state) const throw (Exception) { return proteicAlphabet_->intToChar(translate(codonAlphabet_->charToInt(state))); } bpp-seq-2.1.0/src/Bpp/Seq/DNAToRNA.h000644 000000 000000 00000006462 12147656566 016550 0ustar00rootroot000000 000000 // // File: DNAToRNA.h // Created by: Julien Dutheil // Created on: Sun Oct 12 14:39:29 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _DNATORNA_H_ #define _DNATORNA_H_ #include "Transliterator.h" #include "Alphabet/DNA.h" #include "Alphabet/RNA.h" #include "Alphabet/AlphabetTools.h" namespace bpp { /** * @brief Reverse iterator to convert RNA to DNA. * * This only changes U to T characters. * * @see NucleicAcidsReplication */ class DNAToRNA: public AbstractReverseTransliterator { private: const Alphabet* dna_, * rna_; public: DNAToRNA(): AbstractReverseTransliterator(), dna_(&AlphabetTools::DNA_ALPHABET), rna_(&AlphabetTools::RNA_ALPHABET) {} DNAToRNA(const DNAToRNA& d2r): AbstractReverseTransliterator(d2r), dna_(d2r.dna_), rna_(d2r.rna_) {} DNAToRNA& operator=(const DNAToRNA& d2r) { AbstractReverseTransliterator::operator=(d2r); dna_ = d2r.dna_; rna_ = d2r.rna_; return *this; } virtual ~DNAToRNA() {} public: virtual const Alphabet* getSourceAlphabet() const { return dna_; } virtual const Alphabet* getTargetAlphabet() const { return rna_; } int translate(int state) const throw (BadIntException); std::string translate(const std::string& state) const throw (BadCharException); Sequence* translate(const Sequence& sequence) const throw (AlphabetMismatchException, Exception) { return AbstractReverseTransliterator::translate(sequence); } int reverse(int state) const throw (BadIntException); std::string reverse(const std::string& state) const throw (BadCharException); Sequence* reverse(const Sequence& sequence) const throw (AlphabetMismatchException, Exception) { return AbstractReverseTransliterator::reverse(sequence); } }; } //end of namespace bpp. #endif //_DNATORNA_H_ bpp-seq-2.1.0/src/Bpp/Seq/SequenceWithQualityTools.h000644 000000 000000 00000014251 12147656566 022273 0ustar00rootroot000000 000000 // // File: SequenceWithQualityTools.h // Authors: Vincent Cahais // Sylvain Gaillard // Created on: 16 Apr 2010 // /* Copyright or © or Copr. Bio++ Development Team, (Apr 16, 2010) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef SEQUENCEWITHQUALITYTOOLS_H_ #define SEQUENCEWITHQUALITYTOOLS_H_ #include "SequenceTools.h" #include "SequenceWithQuality.h" namespace bpp { /** * @brief The SequenceWithQualityTools static class * * Implement methods to manipulate SequencesWithQuality * * @todo * - does complement, transcript and reverseTranscript have a meaning on * SequenceWithQuality as the quality is not transferable * * @author Cahais Vincent */ class SequenceWithQualityTools { private: static DNA _DNA; static RNA _RNA; static NucleicAcidsReplication _DNARep; static NucleicAcidsReplication _RNARep; static NucleicAcidsReplication _transc; public: /** * @brief Get a sub-sequence. * * @param sequence The sequence to trunc. * @param begin The first position of the subsequence. * @param end The last position of the subsequence. * @return A new SequenceWithQuality object with the given subsequence. * @throw IndexOutOfBoundsException, Exception In case of bad indices. */ static SequenceWithQuality* subseq( const SequenceWithQuality& sequence, unsigned int begin, unsigned int end ) throw (IndexOutOfBoundsException, Exception) ; /** * @brief Concatenate two sequences. * * Sequences must have the same name and alphabets. * Only first sequence's commentaries are kept. * * @param seqwq1 The first SequenceWithQuality. * @param seqwq2 The second SequenceWithQuality. * @return A new SequenceWithQuality object with the concatenation of the * two sequences. * @throw AlphabetMismatchException If the two alphabets do not match. * @throw Exception If the sequence names do not match. */ static SequenceWithQuality* concatenate( const SequenceWithQuality& seqwq1, const SequenceWithQuality& seqwq2 ) throw (AlphabetMismatchException, Exception) ; /** * @brief Get the complementary sequence of a nucleotide sequence. * * @see DNAReplication * @return sequence A new SequenceWithQuality object with the * complementary sequence. * @param sequence The sequence to complement. * @throw AlphabetException If the sequence is not a nucleotide sequence. */ static SequenceWithQuality* complement( const SequenceWithQuality& sequence ) throw (AlphabetException); /** * @brief Get the transcription sequence of a DNA sequence. * * @see DNAReplication * @return sequence A new SequenceWithQuality object with the * transcription sequence. * @param sequence The sequence to transcript. * @throw AlphabetException If the sequence is not a DNA sequence. */ static SequenceWithQuality* transcript( const SequenceWithQuality& sequence ) throw (AlphabetException); /** * @brief Get the reverse-transcription sequence of a RNA sequence. * * @see DNAReplication * @return sequence A new SequenceWithQuality object with the reverse- * transcription sequence. * @param sequence The SequenceWithQuality to reverse-transcript. * @throw AlphabetException If the sequence is not a RNA sequence. */ static SequenceWithQuality* reverseTranscript( const SequenceWithQuality& sequence ) throw (AlphabetException); /** * @brief Inverse a sequence from 5'->3' to 3'->5' and vice-versa. * * ABCDEF becomes FEDCBA, and the sense attribute is changed (may be * inhibited). * * @return A new SequenceWithQuality object containing the inverted * sequence. * @param sequence The SequenceWithQuality to inverse. */ static SequenceWithQuality* invert( const SequenceWithQuality& sequence ); /** * @brief Remove gaps from a SequenceWithQuality. * * @param seq The sequence to analyse. * @return A new SequenceWithQuality object without gaps. */ static SequenceWithQuality* removeGaps(const SequenceWithQuality& seq); /** * @brief Trim the left part of the sequence according to quality * * @param seq The sequence to analyse. * @return The modified sequence. */ static SequenceWithQuality& trimLeft(SequenceWithQuality& seq); }; } #endif /* SEQUENCEWITHQUALITYTOOLS_H_ */ bpp-seq-2.1.0/src/Bpp/Seq/Io/IoSequence.h000644 000000 000000 00000004154 12147656566 017705 0ustar00rootroot000000 000000 // // File IoSequence.h // Created by: Guillaume Deuchst // Julien Dutheil // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _IOSEQUENCE_H_ #define _IOSEQUENCE_H_ #include #include // From STL: #include namespace bpp { /** * @brief The IOSequence interface. * * Interface for sequences input/ouput. */ class IOSequence: public virtual IOFormat { public: IOSequence() {} virtual ~IOSequence() {} public: const std::string getDataType() const { return "Sequence container"; } }; } //end of namespace bpp. #endif // _IOSEQUENCE_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOSequenceWriterFormat.cpp000644 000000 000000 00000005003 12147656566 023071 0ustar00rootroot000000 000000 // // File: BppOSequenceWriterFormat.cpp // Created by: Julien Dutheil // Created on: Friday September 15th, 21:20 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "BppOSequenceWriterFormat.h" #include "Fasta.h" #include "Mase.h" #include #include #include using namespace bpp; using namespace std; OSequence* BppOSequenceWriterFormat::read(const std::string& description) throw (Exception) { unparsedArguments_.clear(); string format = ""; KeyvalTools::parseProcedure(description, format, unparsedArguments_); unsigned int ncol = ApplicationTools::getParameter("length", unparsedArguments_, 100, "", true, false); auto_ptr oSeq; if (format == "Fasta") { oSeq.reset(new Fasta(ncol)); } else if (format == "Mase") { oSeq.reset(new Mase(ncol)); } else { throw Exception("Sequence format '" + format + "' unknown."); } return oSeq.release(); } bpp-seq-2.1.0/src/Bpp/Seq/Io/PhredPoly.cpp000644 000000 000000 00000006561 12147656566 020112 0ustar00rootroot000000 000000 // // File: PhredPoly.cpp // Created by: Sylvain Gaillard // Created on: Fri Oct 31 2008 // /* Copyright or © or Copr. CNRS, (October 31, 2008) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "PhredPoly.h" #include #include #include using namespace bpp; using namespace std; /******************************************************************************/ PhredPoly::PhredPoly(double ratio) : ratio_(ratio) {} /******************************************************************************/ bool PhredPoly::nextSequence(istream& input, Sequence& seq) const throw (Exception) { if (!input) { throw IOException ("PhredPoly::read: fail to open stream"); } string temp, name, sequence = ""; // Initialization bool flag = false; // Read first line if (!input.eof()) { getline(input, temp, '\n'); // Copy current line in temporary string StringTokenizer st(temp, " "); name = st.getToken(0); } const Alphabet* alpha = seq.getAlphabet(); // Main loop : for all other lines while (!input.eof()) { getline(input, temp, '\n'); // Copy current line in temporary string StringTokenizer st(temp, " "); if (st.numberOfRemainingTokens() == 12) { double a = TextTools::toDouble(st.getToken(3)); double b = TextTools::toDouble(st.getToken(7)); if (a < b) { NumTools::swap(a, b); } vector v; v.push_back(st.getToken(0)); // Get the called base if (b / a > this->ratio_) { v.push_back(st.getToken(4)); // Get the uncalled base if relative picks areas are similar } sequence += alpha->getGeneric(v); } } if(name == "") { throw Exception("PhredPoly::read: sequence without name!"); } else { seq.setName(name); seq.setContent(sequence); flag = true; } return flag; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Io/Fasta.cpp000644 000000 000000 00000020453 12147656566 017236 0ustar00rootroot000000 000000 // // File: Fasta.cpp // Authors: Guillaume Deuchst // Julien Dutheil // Sylvain Gaillard // Created: Tue Aug 21 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "Fasta.h" #include #include "../StringSequenceTools.h" #include #include #include using namespace bpp; using namespace std; /******************************************************************************/ bool Fasta::nextSequence(istream& input, Sequence& seq) const throw (Exception) { if (!input) throw IOException("Fasta::nextSequence: can't read from istream input"); string seqname = ""; string content = ""; Comments seqcmts; short seqcpt = 0; string linebuffer = ""; char c; while (!input.eof()) { c = static_cast(input.peek()); if (input.eof()) c = '\n'; // Sequence begining detection if (c == '>') { // Stop if find a new sequence if (seqcpt++) break; } getline(input, linebuffer); if (c == '>') { // Get the sequence name line seqname = string(linebuffer.begin() + 1, linebuffer.end()); } if (c != '>' && !TextTools::isWhiteSpaceCharacter(c)) { // Sequence content content += TextTools::toUpper(TextTools::removeWhiteSpaces(linebuffer)); } } bool res = (!input.eof()); // Sequence name and comments isolation if (strictNames_ || extended_) { size_t pos = seqname.find_first_of(" \t\n"); string seqcmt; if (pos != string::npos) { seqcmt = seqname.substr(pos + 1); seqname = seqname.substr(0, pos); } if (extended_) { StringTokenizer st(seqcmt, " \\", true, false); while (st.hasMoreToken()) { seqcmts.push_back(st.nextToken()); } } else { seqcmts.push_back(seqcmt); } seq.setComments(seqcmts); } seq.setName(seqname); seq.setContent(content); return res; } /******************************************************************************/ void Fasta::writeSequence(ostream& output, const Sequence& seq) const throw (Exception) { if (!output) throw IOException("Fasta::writeSequence: can't write to ostream output"); // Sequence name output << ">" << seq.getName(); // Sequence comments if (extended_) { for (unsigned int i = 0 ; i < seq.getComments().size() ; i++) { output << " \\" << seq.getComments()[i]; } } output << endl; // Sequence content string buffer; // use a buffer to format sequence with states > 1 char for (unsigned int i = 0 ; i < seq.size() ; i++) { buffer += seq.getChar(i); if (buffer.size() >= charsByLine_ || i + 1 == seq.size()) { output << string(buffer.begin(), buffer.begin() + charsByLine_ < buffer.end() ? buffer.begin() + charsByLine_ : buffer.end()) << endl; buffer.erase(0, charsByLine_); } } } /******************************************************************************/ void Fasta::appendSequencesFromStream(istream& input, SequenceContainer& vsc) const throw (Exception) { if (!input) throw IOException("Fasta::appendFromStream: can't read from istream input"); char c = '\n'; char last_c; bool header = false; bool hasSeq = true; string line = ""; Comments cmts; while (!input.eof() && hasSeq) { last_c = c; input.get(c); // Header detection if (extended_ && c == '#') { header = true; continue; } // Header end detection if (c == '\n') { if (extended_ && header) { if (line[0] == '\\') { line.erase(line.begin()); cmts.push_back(line); } line = ""; header = false; } continue; } // Header capture if (header) { line.append(1, c); } // Sequence detection if (c == '>' && last_c == '\n') { input.putback(c); c = last_c; BasicSequence tmpseq("", "", vsc.getAlphabet()); hasSeq = nextSequence(input, tmpseq); vsc.addSequence(tmpseq, checkNames_); } } if (extended_ && cmts.size()) { vsc.setGeneralComments(cmts); } } /******************************************************************************/ void Fasta::writeSequences(ostream& output, const SequenceContainer& sc) const throw (Exception) { if (!output) throw IOException("Fasta::write: can't write to ostream output"); if (extended_) { // Loop for all general comments for (unsigned int i = 0 ; i < sc.getGeneralComments().size() ; i++) { output << "#\\" << sc.getGeneralComments()[i] << endl; } output << endl; } // Main loop : for all sequences in vector container vector names = sc.getSequencesNames(); for (unsigned int i = 0; i < names.size(); i ++) { writeSequence(output, sc.getSequence(names[i])); } } /******************************************************************************/ // FileIndex class void Fasta::FileIndex::build(const std::string& path) throw (Exception) { // open the file std::ifstream f_in(path.c_str()); // get the size of the file f_in.seekg(0, std::ios::end); fileSize_ = f_in.tellg(); // feed the map f_in.seekg(0, std::ios::beg); streampos pos = f_in.tellg(); char ch; std::string seq_id = ""; while (f_in.get(ch)) { if (ch == '>') { pos = static_cast(f_in.tellg()) - 1; std::getline(f_in, seq_id); index_[seq_id] = pos; } } f_in.close(); } streampos Fasta::FileIndex::getSequencePosition(const std::string& id) const throw (Exception) { std::map::const_iterator it = index_.find(id); if (it != index_.end()) { return it->second; } throw Exception("Sequence not found: " + id); } void Fasta::FileIndex::read(const std::string& path) throw (Exception) { std::ifstream f_in(path.c_str()); std::string line_buffer = ""; while (!f_in.eof()) { std::getline(f_in, line_buffer); if (bpp::TextTools::isEmpty(bpp::TextTools::removeSurroundingWhiteSpaces(line_buffer))) { continue; } bpp::StringTokenizer tk(line_buffer, "\t"); index_[tk.getToken(0)] = bpp::TextTools::toInt(tk.getToken(1)); } f_in.close(); } void Fasta::FileIndex::write(const std::string& path) throw (Exception) { std::ofstream f_out(path.c_str()); for (std::map::const_iterator it = index_.begin() ; it != index_.end() ; ++it) { f_out << it->first << "\t" << bpp::TextTools::toString(it->second) << std::endl; } f_out.close(); } void Fasta::FileIndex::getSequence(const std::string& seqid, Sequence& seq, const std::string& path) const { Fasta fs(60); streampos seq_pos = this->getSequencePosition(seqid); std::ifstream fasta(path.c_str()); fasta.seekg(seq_pos); fs.nextSequence(fasta, seq); fasta.close(); } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Io/MaseTools.cpp000644 000000 000000 00000025075 12147656566 020113 0ustar00rootroot000000 000000 // // File: MaseTools.cpp // Created by: Julien Dutheil // Created on: Tue Apr 1 09:16:59 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "MaseTools.h" #include "../Container/VectorSequenceContainer.h" #include "../Container/AlignedSequenceContainer.h" #include "../Container/SequenceContainerTools.h" #include #include #include using namespace std; using namespace bpp; SiteSelection MaseTools::getSiteSet(const Comments& maseFileHeader, const string& setName) throw (IOException) { SiteSelection selection; for (size_t i = 0; i < maseFileHeader.size(); i++) { string current = maseFileHeader[i]; string::size_type index = current.find("# of"); if (index < current.npos) { StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t=;"); st.nextToken(); // skip next word: may be 'regions' or 'segments' or else ;-) size_t numberOfSegments = TextTools::to(st.nextToken()); string name = st.unparseRemainingTokens(); if (name == setName) { // cout << numberOfSegments << " segments found." << endl; // Then look for the set definition: i++; // next line. size_t counter = 0; while (i < maseFileHeader.size()) { current = maseFileHeader[i++]; StringTokenizer st2(current); // st.nextToken(); //Skip ';;' while (st2.hasMoreToken()) { StringTokenizer st3(st2.nextToken(), ","); size_t begin = TextTools::to(st3.nextToken()); size_t end = TextTools::to(st3.nextToken()); // WARNING!!! In the mase+ format, sites are numbered from 1 to nbSites, // Whereas in SiteContainer the index begins at 0. for (size_t j = begin; j <= end; j++) { selection.push_back(j - 1); // bounds included. } counter++; if (counter == numberOfSegments) return selection; } } } } } if (selection.size() == 0) { throw IOException("Site set " + setName + " has not been found in the sequence file."); } return selection; } /******************************************************************************/ SequenceSelection MaseTools::getSequenceSet(const Comments& maseFileHeader, const string& setName) throw (IOException) { SequenceSelection selection; for (size_t i = 0; i < maseFileHeader.size(); i++) { string current = maseFileHeader[i]; string::size_type index = current.find("@ of"); if (index < current.npos) { StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t=;"); st.nextToken(); // skip next word: may be 'sequences' or else ;-) size_t numberOfSequences = TextTools::to(st.nextToken()); string name = st.unparseRemainingTokens(); size_t counter = 0; if (name == setName) { // cout << numberOfSequences << " segments found." << endl; // Then look for the set definition: i++; // next line. while (i < maseFileHeader.size()) { current = maseFileHeader[i++]; StringTokenizer st2(current, ","); while (st2.hasMoreToken()) { int seqIndex = TextTools::toInt(st2.nextToken()); // WARNING!!! In the mase+ format, sequences are numbered from 1 to nbSequences, // Whereas in SequenceContainer the index begins at 0. selection.push_back(seqIndex - 1); // bounds included. counter++; if (counter == numberOfSequences) return selection; } } } } } if (selection.size() == 0) { throw IOException("Sequence set " + setName + " has not been found in the sequence file."); } return selection; } /******************************************************************************/ SiteContainer* MaseTools::getSelectedSites( const SiteContainer& sequences, const string& setName) throw (IOException) { SiteSelection ss = getSiteSet(sequences.getGeneralComments(), setName); // We need to convert positions in case of word alphabet: size_t wsize = sequences.getAlphabet()->getStateCodingSize(); if (wsize > 1) { if (ss.size() % wsize != 0) throw IOException("MaseTools::getSelectedSites: Site selection is not compatible with the alphabet in use in the container."); SiteSelection ss2; for (size_t i = 0; i < ss.size(); i += wsize) { if (ss[i] % wsize != 0) throw IOException("MaseTools::getSelectedSites: Site selection is not compatible with the alphabet in use in the container."); for (size_t j = 1; j < wsize; ++j) { if (ss[i + j] != (ss[i + j - 1] + 1)) throw IOException("MaseTools::getSelectedSites: Site selection is not compatible with the alphabet in use in the container."); } ss2.push_back(ss[i] / wsize); } return SiteContainerTools::getSelectedSites(sequences, ss2); } else { return SiteContainerTools::getSelectedSites(sequences, ss); } } /******************************************************************************/ SequenceContainer* MaseTools::getSelectedSequences( const OrderedSequenceContainer& sequences, const std::string& setName) throw (IOException) { SequenceSelection ss = getSequenceSet(sequences.getGeneralComments(), setName); VectorSequenceContainer* cont = new VectorSequenceContainer(sequences.getAlphabet()); SequenceContainerTools::getSelectedSequences(sequences, ss, *cont); return cont; } /******************************************************************************/ map MaseTools::getAvailableSiteSelections(const Comments& maseHeader) { map selections; for (size_t i = 0; i < maseHeader.size(); i++) { string current = maseHeader[i]; string::size_type index = current.find("# of"); if (index < current.npos) { StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t\n\f\r=;"); st.nextToken(); // skip next word: may be 'sequences' or else ;-) size_t numberOfSegments = TextTools::toInt(st.nextToken()); string name = st.nextToken(); while (st.hasMoreToken()) { name += " " + st.nextToken(); } size_t counter = 0; size_t nbSites = 0; while (i < maseHeader.size()) { i++; current = maseHeader[i]; StringTokenizer st2(current); // st.nextToken(); //Skip ';;' while (st2.hasMoreToken()) { StringTokenizer st3(st2.nextToken(), ","); size_t begin = TextTools::toInt(st3.nextToken()); size_t end = TextTools::toInt(st3.nextToken()); counter++; nbSites += end - begin + 1; } if (counter == numberOfSegments) { selections[name] = nbSites; break; } } } } return selections; } /******************************************************************************/ map MaseTools::getAvailableSequenceSelections(const Comments& maseHeader) { map selections; for (size_t i = 0; i < maseHeader.size(); i++) { string current = maseHeader[i]; string::size_type index = current.find("@ of"); if (index < current.npos) { StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t\n\f\r=;"); st.nextToken(); // skip next word: may be 'sequences' or else ;-) size_t numberOfSequences = TextTools::fromString(st.nextToken()); string name = st.nextToken(); while (st.hasMoreToken()) { name += st.nextToken(); } selections[name] = numberOfSequences; } } return selections; } /******************************************************************************/ size_t MaseTools::getPhase(const Comments& maseFileHeader, const string& setName) throw (Exception) { size_t phase = 0; string::size_type index = 0; for (size_t i = 0; i < maseFileHeader.size(); i++) { string current = maseFileHeader[i]; index = current.find("# of"); if (index < current.npos) { StringTokenizer st(string(current.begin() + index + 12, current.end()), " \t\n\f\r=;"); // size_t numberOfSegments = TextTools::toInt(st.nextToken()); // cout << "Number of regions: " << st.nextToken() << endl; string name; while (st.hasMoreToken()) { name = st.nextToken(); // cout << "Name of regions: " << name << endl; } if (name == setName) { return phase; } } index = current.find("/codon_start"); if (index < current.npos) { StringTokenizer st(string(current.begin() + index + 12, current.end()), " \t\n\f\r=;"); phase = TextTools::toInt(st.nextToken()); } } throw Exception("PolymorphismSequenceContainer::getPhase: no /codon_start found, or site selection missing."); } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Io/Stockholm.h000644 000000 000000 00000007314 12147656566 017611 0ustar00rootroot000000 000000 // // File: Stockholm.h // Authors: Julien Dutheil // Created: Thu Apr 15 2010 // /* Copyright or © or Copr. Bio++ Development Team (2010) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _STOCKHOLM_H_ #define _STOCKHOLM_H_ #include "AbstractOAlignment.h" #include "../Sequence.h" #include "../Container/SequenceContainer.h" #include "../Container/AlignedSequenceContainer.h" namespace bpp { /** * @brief The Stockholm alignment file format. * * Write to Stockholm files. * Only sequence data is read/written, annotation and secondary structures are ignored. */ class Stockholm: public AbstractOAlignment { private: bool checkNames_; public: /** * @brief Build a new Stockholm object. * * @param checkSequenceNames Tell if the names in the file should be checked for unicity (slower, in o(n*n) where n is the number of sequences). */ Stockholm(bool checkSequenceNames = true) : checkNames_(checkSequenceNames) {} // Class destructor virtual ~Stockholm() {} public: /** * @name The OAlignment interface. * * @{ */ void writeAlignment(std::ostream& output, const SiteContainer& sc) const throw (Exception); void writeAlignment(const std::string& path, const SiteContainer& sc, bool overwrite = true) const throw (Exception) { AbstractOAlignment::writeAlignment(path, sc, overwrite); } /** @} */ /** * @name The IOSequence interface. * * @{ */ const std::string getFormatName() const { return "Stockholm file"; }; const std::string getFormatDescription() const { return "See http://en.wikipedia.org/wiki/Stockholm_format"; } /** @} */ /** * @warning This is not used for now, will be when reading is implemented. * @return true if the names are to be checked when reading sequences from files. */ bool checkNames() const { return checkNames_; } /** * @brief Tell whether the sequence names should be checked when reading from files. * * @warning This is not used for now, will be when reading is implemented. * @param yn whether the sequence names should be checked when reading from files. */ void checkNames(bool yn) { checkNames_ = yn; } }; } //end of namespace bpp. #endif // _FASTA_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOAlphabetIndex1Format.cpp000644 000000 000000 00000011437 12147656566 022725 0ustar00rootroot000000 000000 // // File: BppOAlphabetIndex1Format.cpp // Created by: Julien Dutheil // Created on: Thursday Februar 07th, 16:30 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "BppOAlphabetIndex1Format.h" #include "../Alphabet/AlphabetTools.h" #include "../AlphabetIndex/GranthamAAPolarityIndex.h" #include "../AlphabetIndex/GranthamAAVolumeIndex.h" #include "../AlphabetIndex/KleinAANetChargeIndex.h" #include "../AlphabetIndex/AAChouFasmanAHelixIndex.h" #include "../AlphabetIndex/AAChouFasmanBSheetIndex.h" #include "../AlphabetIndex/AAChouFasmanTurnIndex.h" #include "../AlphabetIndex/AAChenGuHuangHydrophobicityIndex.h" #include "../AlphabetIndex/AASurfaceIndex.h" #include "../AlphabetIndex/AAMassIndex.h" #include "../AlphabetIndex/AAVolumeIndex.h" #include "../AlphabetIndex/AAChargeIndex.h" #include "../AlphabetIndex/AASEAInf10Index.h" #include "../AlphabetIndex/AASEA1030Index.h" #include "../AlphabetIndex/AASEASup30Index.h" #include "../AlphabetIndex/AAIndex1Entry.h" #include #include #include #include using namespace bpp; using namespace std; AlphabetIndex1* BppOAlphabetIndex1Format::read(const std::string& description) throw (Exception) { if (description != "None") { string name; map args; KeyvalTools::parseProcedure(description, name, args); if (verbose_) ApplicationTools::displayResult(message_, description); //Currently, only protein indices are supported: if (!AlphabetTools::isProteicAlphabet(alphabet_)) throw Exception("BppOAlphabetIndex1Format::read. This index is only supported with a protein alphabet."); if (name == "GranthamPolarity") { return new GranthamAAPolarityIndex(); } else if (name == "GranthamVolume") { return new GranthamAAVolumeIndex(); } else if (name == "KleinCharge") { return new KleinAANetChargeIndex(); } else if (name == "ChouFasmanAHelix") { return new AAChouFasmanAHelixIndex(); } else if (name == "ChouFasmanBSheet") { return new AAChouFasmanBSheetIndex(); } else if (name == "ChouFasmanTurn") { return new AAChouFasmanTurnIndex(); } else if (name == "ChenGuHuangHydrophobicity") { return new AAChenGuHuangHydrophobicityIndex(); } else if (name == "Surface") { return new AASurfaceIndex(); } else if (name == "Mass") { return new AAMassIndex(); } else if (name == "Volume") { return new AAVolumeIndex(); } else if (name == "Charge") { return new AAChargeIndex(); } else if (name == "SEAMedium") { return new AASEA1030Index(); } else if (name == "SEAHigh") { return new AASEASup30Index(); } else if (name == "SEALow") { return new AASEAInf10Index(); } else if (name == "User") { string aax1FilePath = ApplicationTools::getAFilePath("file", args, true, true, "", false); ifstream aax1File(aax1FilePath.c_str(), ios::in); AAIndex1Entry* I = new AAIndex1Entry (aax1File); aax1File.close(); return I; } else { throw Exception("Invalid index1 '" + name + "'."); } } else { return 0; } } bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOAlphabetIndex1Format.h000644 000000 000000 00000007600 12147656566 022367 0ustar00rootroot000000 000000 // // File: BppOAlphabetIndex1Format.h // Created by: Julien Dutheil // Created on: Thursday Februar 07th, 16:30 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _BPPOALPHABETINDEX1FORMAT_H_ #define _BPPOALPHABETINDEX1FORMAT_H_ #include #include "../AlphabetIndex/AlphabetIndex1.h" // From the STL: #include namespace bpp { /** * @brief AlphabetIndex1 I/O in BppO format. * * Enables the instanciation of AlphabetIndex1 objects according to * the BppO syntax (see the Bio++ Program Suite * manual for a detailed description of this syntax). * */ class BppOAlphabetIndex1Format: public virtual IOFormat { private: const Alphabet* alphabet_; std::string message_; bool verbose_; public: /** * @param alphabet The alphabet for which indices should be built. * The alphabet will be used to check that the instanciated index is compatible. * @param message Some text describing what the index is intended for. * @param verbose Tell if some messages should be printed while parsing. */ BppOAlphabetIndex1Format(const Alphabet* alphabet, const std::string& message, bool verbose = true): alphabet_(alphabet), message_(message), verbose_(verbose) {} BppOAlphabetIndex1Format(const BppOAlphabetIndex1Format& format): alphabet_(format.alphabet_), message_(format.message_), verbose_(format.verbose_) {} BppOAlphabetIndex1Format& operator=(const BppOAlphabetIndex1Format& format) { alphabet_ = format.alphabet_; message_ = format.message_; verbose_ = format.verbose_; return *this; } virtual ~BppOAlphabetIndex1Format() {} public: const std::string getFormatName() const { return "BppO"; } const std::string getFormatDescription() const { return "Bpp Options format."; } const std::string getDataType() const { return "AlphabetIndex1"; } /** * @brief Read a AlphabetIndex1 object from a string. * * @param description A string describing the index in the keyval syntax. * @return A new AlphabetIndex1 object according to options specified. * @throw Exception if an error occured. */ AlphabetIndex1* read(const std::string& description) throw (Exception); }; } //end of namespace bpp. #endif //_BPPOALPHABETINDEX1FORMAT_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/AbstractOSequence.h000644 000000 000000 00000006611 12147656566 021220 0ustar00rootroot000000 000000 // // File: AbstractOSequence.h // Created by: Julien Dutheil // Created on: ? // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ABSTRACTOSEQUENCE_H_ #define _ABSTRACTOSEQUENCE_H_ #include "OSequence.h" #include "../Alphabet/Alphabet.h" #include "../Container/VectorSequenceContainer.h" // From the STL: #include #include namespace bpp { /** * @brief Partial implementation of the OSequence and OAlignment interfaces. */ class AbstractOSequence: public virtual OSequence, public virtual OAlignment { public: AbstractOSequence() {} virtual ~AbstractOSequence() {} public: /** * @name OSequence methods: * * @{ */ void writeSequences(std::ostream& output, const SequenceContainer& sc) const throw (Exception) = 0; void writeSequences(const std::string& path, const SequenceContainer& sc, bool overwrite=true) const throw (Exception) { // Open file in specified mode std::ofstream output(path.c_str(), overwrite ? (std::ios::out) : (std::ios::out | std::ios::app)); writeSequences(output, sc); output.close(); } /** @} */ /** * @name OAlignment methods: * * As a SiteContainer is a specialization of SequenceContainer, it is assumed that a OSequence * object can write aligned sequence just like a OAlignment object. * Therefore it implements the OAlignment interface by down-casting the SiteContainer * to a SequenceContainer. * @{ */ void writeAlignment(std::ostream& output, const SiteContainer& sc) const throw (Exception) { writeSequences(output, dynamic_cast(sc)); } void writeAlignment(const std::string& path, const SiteContainer& sc, bool overwrite=true) const throw (Exception) { writeSequences(path, dynamic_cast(sc), overwrite); } /** @} */ }; } //end of namespace bpp. #endif //_ABSTRACTOSEQUENCE_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/Dcse.cpp000644 000000 000000 00000007373 12147656566 017064 0ustar00rootroot000000 000000 // // File: DCSE.cpp // Created by: Julien Dutheil // Created on: Wed Mar 3 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "Dcse.h" #include "AbstractIAlignment.h" #include "../Sequence.h" #include "../Container/SequenceContainer.h" #include "../Container/VectorSequenceContainer.h" #include #include #include using namespace bpp; using namespace std; void DCSE::appendAlignmentFromStream(istream& input, SiteContainer& sc) const throw (Exception) { // Checking the existence of specified file if (!input) { throw IOException ("DCSE::read : fail to open file"); } // Initialization const Alphabet * alpha = sc.getAlphabet(); string line, name, sequence = ""; line = FileTools::getNextLine(input); // Copy current line in temporary string //StringTokenizer st(line); //st.nextToken(); //First line ignored for now! //int n1 = TextTools::toInt(st.nextToken()); //int n2 = TextTools::toInt(st.nextToken()); //int nbSites = n2 - n1 //cout << nbSpecies << " species and " << nbSites << " sites." << endl; // Main loop : for all file lines while (!input.eof()) { line = FileTools::getNextLine(input); // Copy current line in temporary string if(line == "") break; string::size_type endOfSeq = line.find(" "); if(endOfSeq == line.npos) break; sequence = string(line.begin(), line.begin() + endOfSeq); sequence = TextTools::removeWhiteSpaces(sequence); sequence = TextTools::removeChar(sequence, '{'); sequence = TextTools::removeChar(sequence, '}'); sequence = TextTools::removeChar(sequence, '['); sequence = TextTools::removeChar(sequence, ']'); sequence = TextTools::removeChar(sequence, '('); sequence = TextTools::removeChar(sequence, ')'); sequence = TextTools::removeChar(sequence, '^'); name = string(line.begin() + endOfSeq + 1, line.end()), name = TextTools::removeFirstWhiteSpaces(name); if(name.find("Helix numbering") == name.npos && name.find("mask") == name.npos) sc.addSequence(BasicSequence(name, sequence, alpha), true); } } const string DCSE::getFormatName() const { return "DCSE"; } const string DCSE::getFormatDescription() const { return "RNA structure format"; } bpp-seq-2.1.0/src/Bpp/Seq/Io/AbstractIAlignment.h000644 000000 000000 00000013072 12147656566 021357 0ustar00rootroot000000 000000 // // File: AbstractIAlignment.h // Created by: Julien Dutheil // Created on: mon 27 jun 16:30 2005 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ABSTRACTIALIGNMENT_H_ #define _ABSTRACTIALIGNMENT_H_ #include "../Container/AlignedSequenceContainer.h" #include "../Alphabet/Alphabet.h" #include "ISequence.h" // From the STL: #include #include #include namespace bpp { /** * @brief Partial implementation of the IAlignment interface, dedicated to alignment readers. */ class AbstractIAlignment: public virtual IAlignment { public: AbstractIAlignment() {} virtual ~AbstractIAlignment() {} public: /** * @name IAlignment methods: * * @{ */ /** * @brief Add sequences to a container from a stream. * * @param input The input stream to read. * @param sc The sequence container to update. * @throw Exception If the file is not in the specified format. */ virtual void readAlignment(std::istream& input, SiteContainer& sc) const throw (Exception) { appendAlignmentFromStream(input, sc); } /** * @brief Add sequences to a container from a file. * * @param path The path to the file to read. * @param sc The sequence container to update. * @throw Exception If the file is not in the specified format. */ virtual void readAlignment(const std::string& path, SiteContainer& sc) const throw (Exception) { appendAlignmentFromFile(path, sc); } virtual #if defined(NO_VIRTUAL_COV) SiteContainer* #else AlignedSequenceContainer* #endif readAlignment(const std::string& path , const Alphabet* alpha) const throw (Exception) { return readAlignmentFromFile(path, alpha); } virtual #if defined(NO_VIRTUAL_COV) SiteContainer* #else AlignedSequenceContainer* #endif readAlignment(std::istream& input, const Alphabet* alpha) const throw (Exception) { return readAlignmentFromStream(input, alpha); } /** @} */ protected: /** * @brief Append sequences to a container from a stream. * * This is the unique method to implement! * * @param input The input stream to read. * @param sc The sequence container to update. * @throw Exception If the file is not in the specified format. */ virtual void appendAlignmentFromStream(std::istream& input, SiteContainer& sc) const throw (Exception) = 0; /** * @brief Append sequences to a container from a file. * * @param path The path to the file to read. * @param sc The sequence container to update. * @throw Exception If the file is not in the specified format. */ virtual void appendAlignmentFromFile(const std::string& path, SiteContainer& sc) const throw (Exception) { std::ifstream input(path.c_str(), std::ios::in); appendAlignmentFromStream(input, sc); input.close(); } /** * @brief Read sequences from a stream. * * @param input The input stream to read. * @param alpha The alphabet to use. * @return A sequence container. * @throw Exception If the file is not in the specified format. */ virtual AlignedSequenceContainer* readAlignmentFromStream(std::istream& input, const Alphabet* alpha) const throw (Exception) { AlignedSequenceContainer* asc = new AlignedSequenceContainer(alpha); appendAlignmentFromStream(input, *asc); return asc; } /** * @brief Read sequences from a file. * * @param path The path to the file to read. * @param alpha The alphabet to use. * @return A sequence container. * @throw Exception If the file is not in the specified format. */ virtual AlignedSequenceContainer* readAlignmentFromFile(const std::string& path, const Alphabet* alpha) const throw (Exception) { AlignedSequenceContainer* asc = new AlignedSequenceContainer(alpha); appendAlignmentFromFile(path, *asc); return asc; } }; } //end of namespace bpp. #endif // _ABSTRACTIALIGNMENT_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/IoSequenceFactory.cpp000644 000000 000000 00000011747 12147656566 021576 0ustar00rootroot000000 000000 // // File IOSequenceFactory.cpp // Created by: Julien Dutheil // Created on: Tue 18/04/06 10:24 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "IoSequenceFactory.h" #include "Fasta.h" #include "Mase.h" #include "Clustal.h" #include "Dcse.h" #include "Phylip.h" #include "GenBank.h" #include "NexusIoSequence.h" using namespace bpp; using namespace std; const string IoSequenceFactory::FASTA_FORMAT = "Fasta"; const string IoSequenceFactory::MASE_FORMAT = "Mase"; const string IoSequenceFactory::CLUSTAL_FORMAT = "Clustal"; const string IoSequenceFactory::DCSE_FORMAT = "DCSE"; const string IoSequenceFactory::PHYLIP_FORMAT_INTERLEAVED = "Phylip I"; const string IoSequenceFactory::PHYLIP_FORMAT_SEQUENTIAL = "Phylip S"; const string IoSequenceFactory::PAML_FORMAT_INTERLEAVED = "PAML I"; const string IoSequenceFactory::PAML_FORMAT_SEQUENTIAL = "PAML S"; const string IoSequenceFactory::GENBANK_FORMAT = "GenBank"; const string IoSequenceFactory::NEXUS_FORMAT = "Nexus"; ISequence* IoSequenceFactory::createReader(const string& format) throw (Exception) { if(format == FASTA_FORMAT) return new Fasta(); else if(format == MASE_FORMAT) return new Mase(); else if(format == CLUSTAL_FORMAT) return new Clustal(); else if(format == DCSE_FORMAT) return new DCSE(); else if(format == PHYLIP_FORMAT_INTERLEAVED) return new Phylip(false, false); else if(format == PHYLIP_FORMAT_SEQUENTIAL) return new Phylip(false, true); else if(format == PAML_FORMAT_INTERLEAVED) return new Phylip(true, false); else if(format == PAML_FORMAT_SEQUENTIAL) return new Phylip(true, true); else if(format == GENBANK_FORMAT) return new GenBank(); else if(format == NEXUS_FORMAT) return new NexusIOSequence(); else throw Exception("Format " + format + " is not supported for sequences input."); } IAlignment* IoSequenceFactory::createAlignmentReader(const string& format) throw (Exception) { if(format == FASTA_FORMAT) return new Fasta(); else if(format == MASE_FORMAT) return new Mase(); else if(format == CLUSTAL_FORMAT) return new Clustal(); else if(format == DCSE_FORMAT) return new DCSE(); else if(format == PHYLIP_FORMAT_INTERLEAVED) return new Phylip(false, false); else if(format == PHYLIP_FORMAT_SEQUENTIAL) return new Phylip(false, true); else if(format == PAML_FORMAT_INTERLEAVED) return new Phylip(true, false); else if(format == PAML_FORMAT_SEQUENTIAL) return new Phylip(true, true); else if(format == NEXUS_FORMAT) return new NexusIOSequence(); else throw Exception("Format " + format + " is not supported for alignment input."); } OSequence* IoSequenceFactory::createWriter(const string& format) throw (Exception) { if(format == FASTA_FORMAT) return new Fasta(); else if(format == MASE_FORMAT) return new Mase(); else throw Exception("Format " + format + " is not supported for output."); } OAlignment* IoSequenceFactory::createAlignmentWriter(const string& format) throw (Exception) { if (format == FASTA_FORMAT) return new Fasta(); else if (format == MASE_FORMAT) return new Mase(); else if (format == PHYLIP_FORMAT_INTERLEAVED) return new Phylip(false, false); else if (format == PHYLIP_FORMAT_SEQUENTIAL) return new Phylip(false, true); else if (format == PAML_FORMAT_INTERLEAVED) return new Phylip(true, false); else if (format == PAML_FORMAT_SEQUENTIAL) return new Phylip(true, true); else throw Exception("Format " + format + " is not supported for output."); } bpp-seq-2.1.0/src/Bpp/Seq/Io/NexusTools.h000644 000000 000000 00000005645 12147656566 017776 0ustar00rootroot000000 000000 // // File: NexusTools.h // Created by: Julien Dutheil // Created on: Wed May 27 19:30 2009 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _NEXUSTOOLS_H_ #define _NEXUSTOOLS_H_ // From the STL: #include #include namespace bpp { /** * @brief Tools for parsing Nexus files. * * The Nexus format is described in the following paper: * Maddison D, Swofford D, and Maddison W (1997), _Syst Biol_ 46(4):590-621 * * @author Julien Dutheil */ class NexusTools { public: /** * @param input The input stream. * @return A string containing the next line in the file wichi is not empty and is no a comment line. */ static std::string getNextNonCommentLine(std::istream& input); /** * @brief parse the next command name within a block. * * @param input [in] The input stream. * @param name [out] Will contain the name of the command. * @param arguments [out] Will contain the arguments of the commans, as raw data. The arguments will not be parsed. * @param lineBrk [in] Tell is the line break should be preserved in the arguments. * @return Whether a command was found in the current block. * @throw IOException In case of bad format. */ static bool getNextCommand(std::istream& input, std::string& name, std::string& arguments, bool lineBrk = true) throw (IOException); }; } //end of namespace bpp. #endif //_NEXUSTOOLS_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/PhredPhd.h000644 000000 000000 00000011455 12147656566 017345 0ustar00rootroot000000 000000 // // File: PhredPhd.h // Created by: Sylvain Gaillard // Created on: Wed Nov 5 2008 // /* Copyright or © or Copr. CNRS, (November 5, 2008) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _PHREDPHD_H_ #define _PHREDPHD_H_ #include "ISequenceStream.h" #include "../Sequence.h" #include "../SequenceWithQuality.h" namespace bpp { /** * @brief The phd sequence file format from phred software. * * This class read DNA SequenceWithQuality from phd files produced by the * phred program from the University of Washington. * * @par Usage * * @code * // Creating a SequenceWithQuality object * DNA alpha; * SequenceWithQuality seq(&alpha); * std::vector pos; * * // Create a PhredPhd parser * PhredPhd pp; * * // Opening the file * std::ifstream in("my_sequence.phd"); * * // Read the sequence * pp.nextSequence(in, seq, pos); * * // Close the file * in.close(); * @endcode * * @author Sylvain Gaillard */ class PhredPhd: public ISequenceStream { public: /** * @brief Build a new PhredPhd object. */ PhredPhd() {} virtual ~PhredPhd() {} public: /** * @name The ISequenceStream interface. * * @{ */ bool nextSequence( std::istream& input, Sequence& seq ) const throw (Exception); /** @} */ /** * @brief Read a SequenceWithQuality from stream and store chromatographic positions * * A more complete parser that read a SequenceWithQuality and store * the position of each base call on the chromatogram in a vector of * int. * * @param input The stram to read. * @param seq The sequence to fill. * @param pos The vector of positions to fill. * @throw Exception IOException and Sequence related exceptions. */ bool nextSequence( std::istream& input, Sequence& seq, std::vector& pos ) const throw (Exception); /** * @name The IOFormat interface. * * @{ */ const std::string getDataType() const { return "SequenceWithQuality"; }; const std::string getFormatName() const { return "phd file"; }; const std::string getFormatDescription() const { return "Sequences following the phd format as describe in the phred documentation."; } /** @} */ private: /** * @brief Global file parser * * @param input The stream to read * @param name The string to store the sequence name * @param sequence The string to store the sequence * @param qual The vector to store qualities * @param pos The vector to store positions */ bool parseFile_(std::istream& input, std::string& name, std::string& sequence, std::vector& qual, std::vector& pos) const; /** * @brief Parse the DNA part of the file * * Read the DNA part until `END_DNA' or EOF. * * @param input The stream to read * @param sequence The string to store the sequence * @param qual The vector to store qualities * @param pos The vector to store positions */ bool parseDNA_(std::istream& input, std::string& sequence, std::vector& qual, std::vector& pos) const; }; } //end of namespace bpp #endif // _PHREDPHD_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/Stockholm.cpp000644 000000 000000 00000005661 12147656566 020147 0ustar00rootroot000000 000000 // // File: Stockholm.cpp // Authors: Julien Dutheil // Created: Thu Apr 15 2010 // /* Copyright or © or Copr. Bio++ Development Team (2010) Julien.Dutheil@univ-montp2.fr This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "Stockholm.h" #include "../StringSequenceTools.h" #include #include #include using namespace bpp; using namespace std; /******************************************************************************/ void Stockholm::writeAlignment(ostream& output, const SiteContainer& sc) const throw (Exception) { if (!output) throw IOException("Stockholm::writeAlignment: can't write to ostream output"); output << "# STOCKHOLM 1.0" << endl; // Loop for all general comments for (size_t i = 0; i < sc.getGeneralComments().size(); ++i) { output << "#=GF CC " << sc.getGeneralComments()[i] << endl; } // Main loop : for all sequences in vector container vector names = sc.getSequencesNames(); size_t maxSize = 0; for(unsigned int i = 0; i < names.size(); ++i) { names[i] = TextTools::removeWhiteSpaces(names[i]); if (names[i].size() > maxSize) maxSize = names[i].size(); } if (maxSize > 255) maxSize = 255; for (size_t i = 0; i < sc.getNumberOfSequences(); ++i) { output << TextTools::resizeRight(names[i], maxSize) << " " << sc.getSequence(i).toString() << endl; } output << "//" << endl; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Io/Fasta.h000644 000000 000000 00000015641 12147656566 016706 0ustar00rootroot000000 000000 // // File: Fasta.h // Authors: Guillaume Deuchst // Julien Dutheil // Sylvain Gaillard // Created: Tue Aug 21 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _BPP_SEQ_IO_FASTA_H_ #define _BPP_SEQ_IO_FASTA_H_ #include "AbstractISequence.h" #include "AbstractIAlignment.h" #include "AbstractOSequence.h" #include "../Sequence.h" #include "../Container/SequenceContainer.h" #include "../Container/VectorSequenceContainer.h" #include "ISequenceStream.h" #include "OSequenceStream.h" #include "SequenceFileIndex.h" namespace bpp { /** * @brief The fasta sequence file format. * * Read and write from/to Fasta files. */ class Fasta: public AbstractISequence, public AbstractIAlignment, public AbstractOSequence, public virtual ISequenceStream, public virtual OSequenceStream { private: /** * @brief The maximum number of chars to be written on a line. */ unsigned int charsByLine_; // Number of char by line (output only) bool checkNames_; // If names must be checked in container bool extended_; // If using HUPO-PSI extensions bool strictNames_; // If name is between '>' and first space public: /** * @brief Build a new Fasta object. * * @param charsByLine Number of character per line when writing files. * @param checkSequenceNames Tells if the names in the file should be checked for unicity (slower, in o(n*n) where n is the number of sequences). * @param extended Tells if we should read general comments and sequence comments in HUPO-PSI format. * @param strictSequenceNames Tells if the sequence names should be restricted to the characters between '>' and the first blank one. */ Fasta(unsigned int charsByLine = 100, bool checkSequenceNames = true, bool extended = false, bool strictSequenceNames = false): charsByLine_(charsByLine), checkNames_(checkSequenceNames), extended_(extended), strictNames_(strictSequenceNames) {} // Class destructor virtual ~Fasta() {} public: /** * @name The AbstractISequence interface. * * @{ */ void appendSequencesFromStream(std::istream& input, SequenceContainer& sc) const throw (Exception); /** @} */ /** * @name The AbstractIAlignment interface. * * @{ */ void appendAlignmentFromStream(std::istream& input, SiteContainer& sc) const throw (Exception) { appendSequencesFromStream(input, sc); //This may raise an exception if sequences are not aligned! } /** @} */ /** * @name The OSequence interface. * * @{ */ void writeSequences(std::ostream& output, const SequenceContainer& sc) const throw (Exception); void writeSequences(const std::string& path, const SequenceContainer& sc, bool overwrite=true) const throw (Exception) { AbstractOSequence::writeSequences(path, sc, overwrite); } /** @} */ /** * @name The IOSequence interface. * * @{ */ const std::string getFormatName() const { return "FASTA file"; }; const std::string getFormatDescription() const { return "Sequence name (preceded by >) in one line, sequence content, no comments"; } /** @} */ /** * @name The ISequenceStream interface. * * @{ */ bool nextSequence(std::istream& input, Sequence& seq) const throw (Exception); /** @} */ /** * @name The OSequenceStream interface. * * @{ */ void writeSequence(std::ostream& output, const Sequence& seq) const throw (Exception); /** @} */ /** * @return true if the names are to be checked when reading sequences from files. */ bool checkNames() const { return checkNames_; } /** * @brief Tell whether the sequence names should be checked when reading from files. * * @param yn whether the sequence names should be checked when reading from files. */ void checkNames(bool yn) { checkNames_ = yn; } /** * @return true if the sequence name is restricted to be between '>' and the first space character. */ bool strictNames() const { return strictNames_; } /** * @brief Tell wethed the sequence name should be restrected to the first non blank characters. * * @param yn whether the sequence names should be restrected. */ void strictNames(bool yn) { strictNames_ = yn; } /** * @brief The SequenceFileIndex class for Fasta format * @author Sylvain Gaillard */ class FileIndex: SequenceFileIndex { public: FileIndex(): index_(), fileSize_(0) {} ~FileIndex() {} void build(const std::string& path) throw (Exception); std::streampos getSequencePosition(const std::string& id) const throw (Exception); size_t getNumberOfSequences() const throw (Exception) { return index_.size(); } /** * @brief Read the index from a file */ void read(const std::string& path) throw (Exception); /** * @brief Write the index to a file */ void write(const std::string& path) throw (Exception); /** * @brief Get a sequence given its ID */ void getSequence(const std::string& seqid, Sequence& seq, const std::string& path) const; private: std::map index_; std::streampos fileSize_; }; }; } //end of namespace bpp. #endif // _BPP_SEQ_IO_FASTA_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOAlphabetIndex2Format.cpp000644 000000 000000 00000010744 12147656566 022726 0ustar00rootroot000000 000000 // // File: BppOAlphabetIndex2Format.cpp // Created by: Julien Dutheil // Created on: Thursday Februar 07th, 19:26 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "BppOAlphabetIndex2Format.h" #include "BppOAlphabetIndex1Format.h" #include "../Alphabet/AlphabetTools.h" #include "../AlphabetIndex/BLOSUM50.h" #include "../AlphabetIndex/GranthamAAChemicalDistance.h" #include "../AlphabetIndex/MiyataAAChemicalDistance.h" #include "../AlphabetIndex/SimpleIndexDistance.h" #include "../AlphabetIndex/AAIndex2Entry.h" #include "../AlphabetIndex/AlphabetIndex1.h" #include #include #include #include using namespace bpp; using namespace std; AlphabetIndex2* BppOAlphabetIndex2Format::read(const std::string& description) throw (Exception) { if (description != "None") { string name; map args; KeyvalTools::parseProcedure(description, name, args); if (verbose_) ApplicationTools::displayResult(message_, description); //Currently, only protein indices are supported: if (!AlphabetTools::isProteicAlphabet(alphabet_)) throw Exception("BppOAlphabetIndex2Format::read. This index is only supported with a protein alphabet."); if (name == "Blosum50") { return new BLOSUM50(); } else if (name == "Grantham") { bool sym = ApplicationTools::getBooleanParameter("symmetrical", args, true, "", true); GranthamAAChemicalDistance* M = new GranthamAAChemicalDistance(); M->setSymmetric(sym); if (!sym) M->setPC1Sign(true); return M; } else if (name == "Miyata") { bool sym = ApplicationTools::getBooleanParameter("symmetrical", args, true, "", true); MiyataAAChemicalDistance* M = new MiyataAAChemicalDistance(); M->setSymmetric(sym); return M; } else if (name == "Diff") { string index1Desc = ApplicationTools::getStringParameter("index1", args, "None", "", true); bool sym = ApplicationTools::getBooleanParameter("symmetrical", args, true, "", true); BppOAlphabetIndex1Format index1Reader(alphabet_, "" , false); AlphabetIndex1* index1 = index1Reader.read(index1Desc); if (index1) { SimpleIndexDistance* M = new SimpleIndexDistance(index1); M->setSymmetric(sym); return M; } else { throw Exception("BppOAlphabetIndex2Format::read. Diff: index1 should be provided."); } } else if (name == "User") { bool sym = ApplicationTools::getBooleanParameter("symmetrical", args, true, "", true); string aax2FilePath = ApplicationTools::getAFilePath("file", args, true, true, "", false); ifstream aax2File(aax2FilePath.c_str(), ios::in); AAIndex2Entry* M = new AAIndex2Entry(aax2File, sym); aax2File.close(); return M; } else { throw Exception("Invalid index2 '" + name + "'."); } } else { return 0; } } bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOSequenceStreamReaderFormat.h000644 000000 000000 00000006541 12147656566 023650 0ustar00rootroot000000 000000 // // File: BppOSequenceStreamReaderFormat.h // Created by: Julien Dutheil // Created on: Tuesday November 20th, 13:27 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _BPPOSEQUENCESTREAMREADERFORMAT_H_ #define _BPPOSEQUENCESTREAMREADERFORMAT_H_ #include "IoSequenceFactory.h" #include "ISequenceStream.h" namespace bpp { /** * @brief Sequence I/O in BppO format. * * Creates a new ISequenceStream object according to * distribution description syntax (see the Bio++ Program Suite * manual for a detailed description of this syntax). * */ class BppOSequenceStreamReaderFormat: public virtual IOFormat { private: bool verbose_; std::map unparsedArguments_; public: BppOSequenceStreamReaderFormat(bool verbose = true): verbose_(verbose), unparsedArguments_() {} virtual ~BppOSequenceStreamReaderFormat() {} public: const std::string getFormatName() const { return "BppO"; } const std::string getFormatDescription() const { return "Bpp Options format."; } const std::string getDataType() const { return "Sequence stream reader"; } /** * @brief Read a ISequenceStream object from a string. * * @param description A string describing the reader in the keyval syntax. * @return A new ISequenceStream object according to options specified. * @throw Exception if an error occured. */ ISequenceStream* read(const std::string& description) throw (Exception); /** * @return The arguments and their unparsed values from the last call of the read function, if there are any. */ virtual const std::map& getUnparsedArguments() const { return unparsedArguments_; } }; } //end of namespace bpp. #endif //_BPPOSEQUENCESTREAMREADERFORMAT_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOAlphabetIndex2Format.h000644 000000 000000 00000007600 12147656566 022370 0ustar00rootroot000000 000000 // // File: BppOAlphabetIndex2Format.h // Created by: Julien Dutheil // Created on: Thursday Februar 07th, 19:26 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _BPPOALPHABETINDEX2FORMAT_H_ #define _BPPOALPHABETINDEX2FORMAT_H_ #include #include "../AlphabetIndex/AlphabetIndex2.h" // From the STL: #include namespace bpp { /** * @brief AlphabetIndex2 I/O in BppO format. * * Enables the instanciation of AlphabetIndex2 objects according to * the BppO syntax (see the Bio++ Program Suite * manual for a detailed description of this syntax). * */ class BppOAlphabetIndex2Format: public virtual IOFormat { private: const Alphabet* alphabet_; std::string message_; bool verbose_; public: /** * @param alphabet The alphabet for which indices should be built. * The alphabet will be used to check that the instanciated index is compatible. * @param message Some text describing what the index is intended for. * @param verbose Tell if some messages should be printed while parsing. */ BppOAlphabetIndex2Format(const Alphabet* alphabet, const std::string& message, bool verbose = true): alphabet_(alphabet), message_(message), verbose_(verbose) {} BppOAlphabetIndex2Format(const BppOAlphabetIndex2Format& format): alphabet_(format.alphabet_), message_(format.message_), verbose_(format.verbose_) {} BppOAlphabetIndex2Format& operator=(const BppOAlphabetIndex2Format& format) { alphabet_ = format.alphabet_; message_ = format.message_; verbose_ = format.verbose_; return *this; } virtual ~BppOAlphabetIndex2Format() {} public: const std::string getFormatName() const { return "BppO"; } const std::string getFormatDescription() const { return "Bpp Options format."; } const std::string getDataType() const { return "AlphabetIndex2"; } /** * @brief Read a AlphabetIndex1 object from a string. * * @param description A string describing the index in the keyval syntax. * @return A new AlphabetIndex2 object according to options specified. * @throw Exception if an error occured. */ AlphabetIndex2* read(const std::string& description) throw (Exception); }; } //end of namespace bpp. #endif //_BPPOALPHABETINDEX2FORMAT_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOSequenceWriterFormat.h000644 000000 000000 00000006376 12147656566 022554 0ustar00rootroot000000 000000 // // File: BppOSequenceWriterFormat.h // Created by: Julien Dutheil // Created on: Saturday September 15th, 21:06 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _BPPOSEQUENCEWRITERFORMAT_H_ #define _BPPOSEQUENCEWRITERFORMAT_H_ #include "IoSequenceFactory.h" namespace bpp { /** * @brief Sequence I/O in BppO format. * * Creates a new OSequence object according to * distribution description syntax (see the Bio++ Program Suite * manual for a detailed description of this syntax). * */ class BppOSequenceWriterFormat: public virtual IOFormat { private: bool verbose_; std::map unparsedArguments_; public: BppOSequenceWriterFormat(bool verbose = true): verbose_(verbose), unparsedArguments_() {} virtual ~BppOSequenceWriterFormat() {} public: const std::string getFormatName() const { return "BppO"; } const std::string getFormatDescription() const { return "Bpp Options format."; } const std::string getDataType() const { return "Sequence reader"; } /** * @brief Read a OSequence object from a string. * * @param description A string describing the reader in the keyval syntax. * @return A new OSequence object according to options specified. * @throw Exception if an error occured. */ OSequence* read(const std::string& description) throw (Exception); /** * @return The arguments and their unparsed values from the last call of the read function, if there are any. */ virtual const std::map& getUnparsedArguments() const { return unparsedArguments_; } }; } //end of namespace bpp. #endif //_BPPOSEQUENCEWRITERFORMAT_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/PhredPoly.h000644 000000 000000 00000005570 12147656566 017556 0ustar00rootroot000000 000000 // // File: PhredPoly.h // Created by: Sylvain Gaillard // Created on: Fri Oct 31 2008 // /* Copyright or © or Copr. CNRS, (October 31, 2008) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _PHREDPOLY_H_ #define _PHREDPOLY_H_ #include "ISequenceStream.h" #include "../Sequence.h" namespace bpp { /** * @brief The poly sequence file format from phred software. * * This class read DNA sequence from poly files produced by the phred program * from the University of Washington. * For now, only read raw sequences and do a basic filter on heterozygous site. */ class PhredPoly: public ISequenceStream { protected: double ratio_; public: /** * @brief Build a new PhredPoly object. */ PhredPoly(double ratio = 0.8); virtual ~PhredPoly() {} public: /** * @name The AbstractISequence interface. * * @{ */ bool nextSequence(std::istream& input, Sequence& seq) const throw (Exception); /** @} */ /** * @name The IOSequence interface. * * @{ */ const std::string getDataType() const { return "Sequence"; }; const std::string getFormatName() const { return "poly file"; }; const std::string getFormatDescription() const { return "Sequences following the poly format as describe in the phred documentation."; } /** @} */ }; } //end of namespace bpp #endif // _PHREDPOLY_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/AbstractISequence.h000644 000000 000000 00000013000 12147656566 021200 0ustar00rootroot000000 000000 // // File: AbstractISequence.h // Created by: Julien Dutheil // Created on: ? // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ABSTRACTISEQUENCE_H_ #define _ABSTRACTISEQUENCE_H_ #include "ISequence.h" #include "../Container/VectorSequenceContainer.h" #include "../Alphabet/Alphabet.h" // From the STL: #include #include #include namespace bpp { /** * @brief Partial implementation of the ISequence interface. */ class AbstractISequence: public virtual ISequence { public: AbstractISequence() {} virtual ~AbstractISequence() {} public: /** * @name ISequence methods: * * @{ */ public: /** * @brief Add sequences to a container from a stream. * * @param input The input stream to read. * @param sc The sequence container to update. * @throw Exception If the file is not in the specified format. */ virtual void readSequences(std::istream& input, SequenceContainer& sc) const throw (Exception) { appendSequencesFromStream(input, sc); } /** * @brief Add sequences to a container from a file. * * @param path The path to the file to read. * @param sc The sequence container to update. * @throw Exception If the file is not in the specified format. */ virtual void readSequences(const std::string& path, SequenceContainer& sc) const throw (Exception) { appendSequencesFromFile(path, sc); } virtual #if defined(NO_VIRTUAL_COV) SequenceContainer* #else VectorSequenceContainer* #endif readSequences(std::istream& input, const Alphabet* alpha) const throw (Exception) { return readSequencesFromStream(input, alpha); } virtual #if defined(NO_VIRTUAL_COV) SequenceContainer* #else VectorSequenceContainer* #endif readSequences(const std::string& path , const Alphabet* alpha) const throw (Exception) { return readSequencesFromFile(path, alpha); } /** @} */ protected: /** * @brief Append sequences to a container from a stream. * * This is the unique method to implement! * * @param input The input stream to read. * @param sc The sequence container to update. * @throw Exception If the file is not in the specified format. */ virtual void appendSequencesFromStream(std::istream& input, SequenceContainer& sc) const throw (Exception) = 0; /** * @brief Append sequences to a container from a file. * * @param path The path to the file to read. * @param sc The sequence container to update. * @throw Exception If the file is not in the specified format. */ virtual void appendSequencesFromFile(const std::string& path, SequenceContainer& sc) const throw (Exception) { std::ifstream input(path.c_str(), std::ios::in); appendSequencesFromStream(input, sc); input.close(); } /** * @brief Read sequences from a stream. * * @param input The input stream to read. * @param alpha The alphabet to use. * @return A sequence container. * @throw Exception If the file is not in the specified format. */ virtual VectorSequenceContainer* readSequencesFromStream(std::istream& input, const Alphabet* alpha) const throw (Exception) { VectorSequenceContainer* vsc = new VectorSequenceContainer(alpha); appendSequencesFromStream(input, *vsc); return vsc; } /** * @brief Append sequences to a container from a file. * * @param path The path to the file to read. * @param alpha The alphabet to use. * @throw Exception If the file is not in the specified format. */ virtual VectorSequenceContainer* readSequencesFromFile(const std::string& path , const Alphabet* alpha) const throw (Exception) { VectorSequenceContainer* vsc = new VectorSequenceContainer(alpha); appendSequencesFromFile(path, *vsc); return vsc; } }; } //end of namespace bpp. #endif //_ABSTRACTISEQUENCE_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/GenBank.h000644 000000 000000 00000005234 12147656566 017152 0ustar00rootroot000000 000000 // // File: GenBank.h // Created by: Julien Dutheil // Created on: Tue Oct 2 2007 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _GENBANK_H_ #define _GENBANK_H_ #include "AbstractISequence.h" #include "../Sequence.h" #include "../Container/SequenceContainer.h" #include "../Container/VectorSequenceContainer.h" namespace bpp { /** * @brief The GenBank sequence file format. * * For now, only read raw sequences. Features are not yet supported. */ class GenBank : public AbstractISequence { public: /** * @brief Build a new GenBank object. */ GenBank() {} virtual ~GenBank() {} public: /** * @name The AbstractISequence interface. * * @{ */ void appendSequencesFromStream(std::istream& input, SequenceContainer& sc) const throw (Exception); /** @} */ /** * @name The IOSequence interface. * * @{ */ const std::string getFormatName() const { return "GenBank file"; }; const std::string getFormatDescription() const { return "Sequences following the GenBank data base format."; } /** @} */ }; } //end of namespace bpp. #endif // _GENBANK_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/ISequenceStream.h000644 000000 000000 00000005042 12147656566 020677 0ustar00rootroot000000 000000 // // File ISequenceStream.h // Author: Sylvain Gaillard // Created: 18/08/2009 // /* Copyright or © or Copr. Bio++ Development Team, (August 18, 2009) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ISEQUENCESTREAM_H_ #define _ISEQUENCESTREAM_H_ #include "IoSequenceStream.h" #include "../Sequence.h" #include "../Alphabet/Alphabet.h" #include namespace bpp { /** * @brief The ISequenceStream interface. * * Interface for streaming sequences input. * * @author Sylvain Gaillard */ class ISequenceStream: public virtual IOSequenceStream { public: ISequenceStream() {} virtual ~ISequenceStream() {} public: /** * @brief Read sequence from stream. * * Read one sequence from a stream. * * @param input The stream to read. * @param seq The sequence to fill. * @return true if a sequence was read or false if not. * @throw Exception IOExecption and Sequence related Exceptions. */ virtual bool nextSequence(std::istream& input, Sequence& seq) const throw (Exception) = 0; }; } //end of namespace bpp. #endif // _ISEQUENCESTREAM_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOAlignmentReaderFormat.cpp000644 000000 000000 00000011540 12147656566 023170 0ustar00rootroot000000 000000 // // File: BppOAlignmentReaderFormat.cpp // Created by: Julien Dutheil // Created on: Friday September 15th, 22:06 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "BppOAlignmentReaderFormat.h" #include "Mase.h" #include "Phylip.h" #include "Fasta.h" #include "Clustal.h" #include "Dcse.h" #include "NexusIoSequence.h" #include #include #include using namespace bpp; using namespace std; IAlignment* BppOAlignmentReaderFormat::read(const std::string& description) throw (Exception) { unparsedArguments_.clear(); string format = ""; KeyvalTools::parseProcedure(description, format, unparsedArguments_); auto_ptr iAln; if (format == "Mase") { iAln.reset(new Mase()); } else if (format == "Phylip") { bool sequential = true, extended = true; string split = " "; if (unparsedArguments_.find("order") != unparsedArguments_.end()) { if (unparsedArguments_["order"] == "sequential") sequential = true; else if (unparsedArguments_["order"] == "interleaved") sequential = false; else ApplicationTools::displayWarning("Argument '" + unparsedArguments_["order"] + "' for argument 'Phylip#order' is unknown. " + "Default used instead: sequential."); } else ApplicationTools::displayWarning("Argument 'Phylip#order' not found. Default used instead: sequential."); if (unparsedArguments_.find("type") != unparsedArguments_.end()) { if (unparsedArguments_["type"] == "extended") { extended = true; split = ApplicationTools::getStringParameter("split", unparsedArguments_, "spaces", "", true, false); if (split == "spaces") split = " "; else if (split == "tab") split = "\t"; else throw Exception("Unknown option for Phylip#split: " + split); } else if (unparsedArguments_["type"] == "classic") extended = false; else ApplicationTools::displayWarning("Argument '" + unparsedArguments_["type"] + "' for parameter 'Phylip#type' is unknown. " + "Default used instead: extended."); } else ApplicationTools::displayWarning("Argument 'Phylip#type' not found. Default used instead: extended."); iAln.reset(new Phylip(extended, sequential, 100, true, split)); } else if (format == "Fasta") { bool strictNames = ApplicationTools::getBooleanParameter("strict_names", unparsedArguments_, false, "", true, false); bool extended = ApplicationTools::getBooleanParameter("extended", unparsedArguments_, false, "", true, false); iAln.reset(new Fasta(100, true, extended, strictNames)); } else if (format == "Clustal") { unsigned int extraSpaces = ApplicationTools::getParameter("extraSpaces", unparsedArguments_, 0, "", true, false); iAln.reset(new Clustal(true, extraSpaces)); } else if (format == "Dcse") { iAln.reset(new DCSE()); } else if (format == "Nexus") { iAln.reset(new NexusIOSequence()); } else { throw Exception("Sequence format '" + format + "' unknown."); } return iAln.release(); } bpp-seq-2.1.0/src/Bpp/Seq/Io/PhredPhd.cpp000644 000000 000000 00000011240 12147656566 017670 0ustar00rootroot000000 000000 // // File: PhredPhd.cpp // Created by: Sylvain Gaillard // Created on: Wed Nov 5 2008 // /* Copyright or © or Copr. CNRS, (November 5, 2008) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "PhredPhd.h" #include #include using namespace bpp; /******************************************************************************/ //PhredPhd::PhredPhd() {} /******************************************************************************/ bool PhredPhd::nextSequence(std::istream& input, Sequence& seq) const throw (Exception) { std::vector pos; return nextSequence(input, seq, pos); } /******************************************************************************/ bool PhredPhd::nextSequence(std::istream& input, Sequence& seq, std::vector& pos) const throw (Exception) { if (!input) { throw IOException ("PhredPhd::read: fail to open stream"); } bool flag = false; std::string name, sequence = ""; // Initialization std::vector q, p; flag = parseFile_(input, name, sequence, q, p); // Sequence creation if(name == "") throw Exception("PhredPhd::read: sequence without name!"); seq.setName(name); seq.setContent(sequence); try { SequenceWithQuality& sq = dynamic_cast(seq); sq.setQualities(q); } catch (...) { } return flag; } /******************************************************************************/ bool PhredPhd::parseFile_(std::istream& input, std::string& name, std::string& sequence, std::vector& qual, std::vector& pos) const { bool readSeqFlag = false; std::string temp; // Read sequence info // Main loop : for all lines while (!input.eof()) { std::getline(input, temp, '\n'); // Copy current line in temporary string StringTokenizer st(temp, " "); if (st.hasMoreToken()) { if (st.getToken(0) == "BEGIN_SEQUENCE") { name = st.getToken(1); } std::string flag = st.getToken(0); while (flag != "END_SEQUENCE" && !input.eof()) { getline(input, temp, '\n'); StringTokenizer st2(temp, " "); if (st2.hasMoreToken()) { flag = st2.getToken(0); } if (flag == "BEGIN_DNA") { readSeqFlag = parseDNA_(input, sequence, qual, pos); break; // End the whole loop after parsing DNA } } } } return readSeqFlag; } /******************************************************************************/ bool PhredPhd::parseDNA_(std::istream& input, std::string& sequence, std::vector& qual, std::vector& pos) const { bool readSeqFlag = false; std::string line_buffer; std::string flag; sequence.clear(); qual.clear(); pos.clear(); while (flag != "END_DNA" && !input.eof()) { std::getline(input, line_buffer, '\n'); StringTokenizer st(line_buffer, " "); if (st.hasMoreToken()) { flag = TextTools::toUpper(st.getToken(0)); if (st.numberOfRemainingTokens() == 3) { sequence += flag; qual.push_back(TextTools::toInt(st.getToken(1))); pos.push_back(TextTools::toInt(st.getToken(2))); readSeqFlag = true; } } } return readSeqFlag; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Io/Clustal.cpp000644 000000 000000 00000011356 12147656566 017611 0ustar00rootroot000000 000000 // // File: Clustal.cpp // Created by: Julien Dutheil // Created on: ? // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "Clustal.h" #include #include #include using namespace bpp; // From the STL: #include #include using namespace std; void Clustal::appendAlignmentFromStream(std::istream& input, SiteContainer & sc) const throw (Exception) { // Checking the existence of specified file if (!input) { throw IOException ("Clustal::read : fail to open file"); } const Alphabet * alpha = sc.getAlphabet(); vector sequences; string lineRead(""); Comments comments(1); comments[0] = FileTools::getNextLine(input); // First line gives file generator. lineRead = FileTools::getNextLine(input); // This is the first sequence of the first block. string::size_type beginSeq = 0; unsigned int count = 0; for (size_t i = lineRead.size(); i > 0; i--) { char c = lineRead[i-1]; if (c == ' ') { count++; if (count == nbSpacesBeforeSeq_) { beginSeq = i - 1 + nbSpacesBeforeSeq_; break; } } else count = 0; } if (beginSeq == 0) throw IOException("Clustal::read. Bad intput file."); unsigned int countSequences = 0; //Read first sequences block: bool test = true; do { sequences.push_back(BasicSequence(TextTools::removeSurroundingWhiteSpaces(lineRead.substr(0, beginSeq - nbSpacesBeforeSeq_)), lineRead.substr(beginSeq), alpha)); getline(input, lineRead, '\n'); countSequences++; test = !TextTools::isEmpty(lineRead) && !TextTools::isEmpty(lineRead.substr(0, beginSeq - nbSpacesBeforeSeq_)); } while (input && test); // Read other blocks lineRead = FileTools::getNextLine(input); // Read first sequence of next block. while (!TextTools::isEmpty(lineRead)) { // Read next block: for (unsigned int i = 0; i < countSequences; ++i) { // Complete sequences if (TextTools::isEmpty(lineRead)) throw IOException("Clustal::read. Bad intput file."); sequences[i].append(lineRead.substr(beginSeq)); getline(input, lineRead, '\n'); } //At this point, lineRead is the first line after the current block. lineRead = FileTools::getNextLine(input); } for (unsigned int i = 0; i < countSequences; ++i) sc.addSequence(sequences[i], checkNames_); sc.setGeneralComments(comments); } void Clustal::writeAlignment(std::ostream& output, const SiteContainer& sc) const throw (Exception) { output << "CLUSTAL W (1.81) multiple sequence alignment" << endl; output << endl; if (sc.getNumberOfSequences() == 0) return; vector text; size_t length = 0; for (size_t i = 0; i < sc.getNumberOfSequences(); ++i ) { const Sequence& seq = sc.getSequence(i); if (seq.getName().size() > length) length = seq.getName().size(); text.push_back(sc.getSequence(i).toString()); } length += nbSpacesBeforeSeq_; for (unsigned int j = 0; j < text[0].size(); j += charsByLine_) { for (unsigned int i = 0; i < sc.getNumberOfSequences(); ++i ) { output << TextTools::resizeRight(sc.getSequence(i).getName(), length); output << text[i].substr(j, charsByLine_) << endl; } output << endl; } } bpp-seq-2.1.0/src/Bpp/Seq/Io/Phylip.h000644 000000 000000 00000015326 12147656566 017115 0ustar00rootroot000000 000000 // // File: Phylip.h // Created by: Julien Dutheil // Created on: Mon Oct 27 12:22:56 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _PHYLIP_H_ #define _PHYLIP_H_ #include "AbstractIAlignment.h" #include "AbstractOAlignment.h" #include "../Sequence.h" #include "../Container/SequenceContainer.h" #include "../Container/VectorSequenceContainer.h" #include "../Container/AlignedSequenceContainer.h" // From the STL: #include namespace bpp { /** * @brief The Phylip & co format. * * An AlignedSequenceContainer is used instead of a VectorSequenceContainer. * * This format is described on the Phylip package documentation website: * http://evolution.genetics.washington.edu/phylip/doc/sequence.html */ class Phylip : public AbstractIAlignment, public AbstractOAlignment, public virtual ISequence { private: /* this class allows two kinds of Phylip format: * traditional, with names limited to 10 chars, * and 'extended', defined by PAML, with names separated from sequences by at least 6 white spaces. */ bool extended_; /* tells if sequences are in the seuqential or the interleave format/ */ bool sequential_; /** * @brief The maximum number of chars to be written on a line. */ unsigned int charsByLine_; bool checkNames_; std::string namesSplit_; public: /** * @brief Build a new Phylip file reader. * * @param extended If true, sequences with names longer than 10 characters are allowed. * @param sequential If false, sequences are supposed to be interlaved. * @param charsByLine The number of base to display in a row. * @param checkSequenceNames Tell if the names in the file should be checked for unicity (slower, in o(n*n) where n is the number of sequences). * @param split The string to use to split sequence name from content (only for 'extended' format). This will typically be " " (two spaces) or "\t" (a tabulation). */ Phylip(bool extended = true, bool sequential = true, unsigned int charsByLine = 100, bool checkSequenceNames = true, const std::string& split = " "): extended_(extended), sequential_(sequential), charsByLine_(charsByLine), checkNames_(checkSequenceNames), namesSplit_(split) {} virtual ~Phylip() {} public: /** * @name The AbstractIAlignment interface. * * @{ */ void appendAlignmentFromStream(std::istream& input, SiteContainer& sc) const throw (Exception); /** @} */ /** * @name The ISequence interface. * * As a SiteContainer is a subclass of SequenceContainer, we hereby implement the ISequence * interface by downcasting the interface. * * @{ */ virtual SequenceContainer* readSequences(std::istream& input, const Alphabet* alpha) const throw (Exception) { return readAlignment(input, alpha); } virtual SequenceContainer* readSequences(const std::string& path, const Alphabet* alpha) const throw (Exception) { return readAlignment(path, alpha); } /** @} */ /** * @return The number of sequences contained in the specified file. * * This methods parses the firt line of the phylip file. * @param path The path of the file to parse. */ unsigned int getNumberOfSequences(const std::string& path) const throw (IOException); /** * @name The OSequence interface. * * @{ */ void writeAlignment(std::ostream& output, const SiteContainer& sc) const throw (Exception); void writeAlignment(const std::string& path, const SiteContainer& sc, bool overwrite) const throw (Exception) { AbstractOAlignment::writeAlignment(path, sc, overwrite); } /** @} */ /** * @name The IOSequence interface. * * @{ */ const std::string getFormatName() const; const std::string getFormatDescription() const; /** @} */ /** * @return true if the names are to be checked when reading sequences from files. */ bool checkNames() const { return checkNames_; } /** * @brief Tell whether the sequence names should be checked when reading from files. * * @param yn whether the sequence names should be checked when reading from files. */ void checkNames(bool yn) { checkNames_ = yn; } /** * @return The string used to split sequence name from content. */ const std::string& getSplit() const { return namesSplit_; } /** * @param split The string to be used to split sequence name from content. */ void setSplit(const std::string& split) { namesSplit_ = split; } protected: //Reading tools: const std::vector splitNameAndSequence(const std::string& s) const throw (Exception); void readSequential (std::istream& in, SiteContainer& asc) const throw (Exception); void readInterleaved(std::istream& in, SiteContainer& asc) const throw (Exception); //Writing tools: std::vector getSizedNames(const std::vector& names) const; void writeSequential (std::ostream& out, const SequenceContainer& sc, int charsByLine) const; void writeInterleaved(std::ostream& out, const SequenceContainer& sc, int charsByLine) const; }; } //end of namespace bpp. #endif //_PHYLIP_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/OSequence.h000644 000000 000000 00000010404 12147656566 017527 0ustar00rootroot000000 000000 // // File: OSequence.h // Created by: Guillaume Deuchst // Julien Dutheil // Created on: Tue Aug 21 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _OSEQUENCE_H_ #define _OSEQUENCE_H_ #include "../Container/SequenceContainer.h" #include "../Container/SiteContainer.h" #include "IoSequence.h" #include namespace bpp { /** * @brief The OSequence interface. * * This interface defines the basic methods for writing sequences to a file. */ class OSequence: public virtual IOSequence { public: OSequence() {} virtual ~OSequence() {} public: /** * @brief Write a container to a stream. * * @param output The output stream where to write. * @param sc The container to write. * @throw Exception If the file is not in the specified format. */ virtual void writeSequences(std::ostream& output, const SequenceContainer& sc) const throw (Exception) = 0; /** * @brief Write a container to a file. * * @param path The path to the file to write. * @param sc The container to write. * @param overwrite If true the sequences are written at the beginning of the file instead of being appended. * Any previous content will be lost. * @throw Exception If the file is not in the specified format. */ virtual void writeSequences(const std::string& path, const SequenceContainer & sc, bool overwrite) const throw (Exception) = 0; }; /** * @brief The OAlignment interface. * * This interface defines the basic methods for writing alignments to a file. */ class OAlignment: public virtual IOSequence { public: OAlignment() {} virtual ~OAlignment() {} public: /** * @brief Write a container to a stream. * * @param output The output stream where to write. * @param sc The container to write. * @throw Exception If the file is not in the specified format. */ virtual void writeAlignment(std::ostream& output, const SiteContainer& sc) const throw (Exception) = 0; /** * @brief Write a container to a file. * * @param path The path to the file to write. * @param sc The container to write. * @param overwrite If true the sequences are written at the beginning of the file instead of being appended. * Any previous content will be lost. * @throw Exception If the file is not in the specified format. */ virtual void writeAlignment(const std::string& path, const SiteContainer& sc, bool overwrite) const throw (Exception) = 0; }; } //end of namespace bpp. #endif // _OSEQUENCE_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/ISequence.h000644 000000 000000 00000010353 12147656566 017524 0ustar00rootroot000000 000000 // // File: ISequence.h // Created by: Guillaume Deuchst // Julien Dutheil // Created on: Wed Jul 30 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ISEQUENCE_H_ #define _ISEQUENCE_H_ #include "IoSequence.h" #include "../Sequence.h" #include "../Container/SequenceContainer.h" #include "../Container/SiteContainer.h" #include //From the STL: #include #include namespace bpp { /** * @brief The ISequence interface. * * This interface defines the basic methods for reading sequences from a file. * NB: This interface is effective only if the VIRTUAL_COV option is enabled (default behavior). */ class ISequence : public virtual IOSequence { public: ISequence() {} virtual ~ISequence() {} public: /** * @brief Create a new container from a stream. * * @param input The input stream to read. * @param alpha The alphabet to be associated to the container. * @return A new SequenceContainer object. * @throw Exception If the file is not in the specified format. */ virtual SequenceContainer* readSequences(std::istream& input, const Alphabet* alpha) const throw (Exception) = 0; /** * @brief Create a new container from a file. * * @param path The path to the file to read. * @param alpha The alphabet to be associated to the container. * @return A new SequenceContainer object. * @throw Exception If the file is not in the specified format. */ virtual SequenceContainer* readSequences(const std::string& path, const Alphabet* alpha) const throw (Exception) = 0; }; /** * @brief The IAlignment interface. * * This interface defines the basic methods for reading aligned sequences from a file. */ class IAlignment: public virtual IOSequence { public: IAlignment() {} virtual ~IAlignment() {} public: /** * @brief Create a new container from a stream. * * @param input The input stream to read. * @param alpha The alphabet to be associated to the container. * @return A new SiteContainer object. * @throw Exception If the file is not in the specified format. */ virtual SiteContainer* readAlignment(std::istream& input, const Alphabet* alpha) const throw (Exception) = 0; /** * @brief Create a new container from a file. * * @param path The path to the file to read. * @param alpha The alphabet to be associated to the container. * @return A new SiteContainer object. * @throw Exception If the file is not in the specified format. */ virtual SiteContainer* readAlignment(const std::string& path, const Alphabet* alpha) const throw (Exception) = 0; }; } //end of namespace bpp. #endif // _ISEQUENCE_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOAlignmentWriterFormat.cpp000644 000000 000000 00000011004 12147656566 023235 0ustar00rootroot000000 000000 // // File: BppOAlignmentWriterFormat.cpp // Created by: Julien Dutheil // Created on: Friday September 15th, 22:12 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "BppOAlignmentWriterFormat.h" #include "Fasta.h" #include "Mase.h" #include "Clustal.h" #include "Phylip.h" #include "Stockholm.h" #include #include #include using namespace bpp; using namespace std; OAlignment* BppOAlignmentWriterFormat::read(const std::string& description) throw (Exception) { unparsedArguments_.clear(); string format = ""; KeyvalTools::parseProcedure(description, format, unparsedArguments_); unsigned int ncol = ApplicationTools::getParameter("length", unparsedArguments_, 100, "", true, false); auto_ptr oAln; if (format == "Fasta") { oAln.reset(new Fasta(ncol)); } else if (format == "Mase") { oAln.reset(new Mase(ncol)); } else if (format == "Clustal") { oAln.reset(new Clustal(ncol)); } else if (format == "Phylip") { bool sequential = true, extended = true; string split = " "; if (unparsedArguments_.find("order") != unparsedArguments_.end()) { if (unparsedArguments_["order"] == "sequential") sequential = true; else if (unparsedArguments_["order"] == "interleaved") sequential = false; else ApplicationTools::displayWarning("Argument '" + unparsedArguments_["order"] + "' for argument 'Phylip#order' is unknown. " + "Default used instead: sequential."); } else ApplicationTools::displayWarning("Argument 'Phylip#order' not found. Default used instead: sequential."); if (unparsedArguments_.find("type") != unparsedArguments_.end()) { if (unparsedArguments_["type"] == "extended") { extended = true; split = ApplicationTools::getStringParameter("split", unparsedArguments_, "spaces", "", true, false); if (split == "spaces") split = " "; else if (split == "tab") split = "\t"; else throw Exception("Unknown option for Phylip#split: " + split); } else if (unparsedArguments_["type"] == "classic") extended = false; else ApplicationTools::displayWarning("Argument '" + unparsedArguments_["type"] + "' for parameter 'Phylip#type' is unknown. " + "Default used instead: extended."); } else ApplicationTools::displayWarning("Argument 'Phylip#type' not found. Default used instead: extended."); oAln.reset(new Phylip(extended, sequential, ncol, true, split)); } else if (format == "Stockholm") { oAln.reset(reinterpret_cast(new Stockholm())); } else { throw Exception("Sequence format '" + format + "' unknown."); } return oAln.release(); } bpp-seq-2.1.0/src/Bpp/Seq/Io/IoSequenceStream.h000644 000000 000000 00000004011 12147656566 021051 0ustar00rootroot000000 000000 // // File IOSequenceStream.h // Author: Sylvain Gaillard // Created: 19/08/2009 // /* Copyright or © or Copr. CNRS, (August 19, 2009) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _IOSEQUENCESTREAM_H_ #define _IOSEQUENCESTREAM_H_ #include namespace bpp { /** * @brief The IOSequenceStream interface. * * Interface for streaming sequences. * * @author Sylvain Gaillard */ class IOSequenceStream: public virtual IOFormat { public: IOSequenceStream() {} virtual ~IOSequenceStream() {} }; } //end of namespace bpp. #endif // _IOSEQUENCESTREAM_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/Clustal.h000644 000000 000000 00000011374 12147656566 017256 0ustar00rootroot000000 000000 // // File: Clustal.h // Created by: Julien Dutheil // Created on: ? // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _CLUSTAL_H_ #define _CLUSTAL_H_ #include "AbstractIAlignment.h" #include "AbstractOAlignment.h" #include "../Container/SiteContainer.h" // From the STL: #include namespace bpp { /** * @brief The clustal sequence file format. * * An AlignedSequenceContainer object is used instead of a VectorSequenceContainer. */ class Clustal : public AbstractIAlignment, public AbstractOAlignment, public virtual ISequence { private: bool checkNames_; unsigned int nbSpacesBeforeSeq_; unsigned int charsByLine_; public: /** * @brief Build a new Clustal object. * * @param checkSequenceNames Tell if the names in the file should be checked for unicity (slower, in o(n*n) where n is the number of sequences). * @param nbExtraSpacesBeforeSeq Specify the number of extra space characters separating the sequence name form content. The default is 5 (hence 6 spaces in total) for backward compatibility, using 0 will not allow for any space in the sequence names. * @param charsByLine Number of character per line when writing file. */ Clustal(bool checkSequenceNames = true, unsigned int nbExtraSpacesBeforeSeq = 5, unsigned int charsByLine = 100) throw (Exception) : checkNames_(checkSequenceNames), nbSpacesBeforeSeq_(nbExtraSpacesBeforeSeq + 1), charsByLine_(charsByLine) {} virtual ~Clustal() {} public: /** * @name The AbstractIAlignment interface. * * @{ */ void appendAlignmentFromStream(std::istream& input, SiteContainer& sc) const throw (Exception); /** @} */ /** * @name The ISequence interface. * * As a SiteContainer is a subclass of SequenceContainer, we hereby implement the ISequence * interface by downcasting the interface. * * @{ */ virtual SequenceContainer* readSequences(std::istream& input, const Alphabet* alpha) const throw (Exception) { return readAlignment(input, alpha); } virtual SequenceContainer* readSequences(const std::string& path, const Alphabet* alpha) const throw (Exception) { return readAlignment(path, alpha); } /** @} */ /** * @name The AbstractOAlignment interface. * * @{ */ void writeAlignment(std::ostream& output, const SiteContainer& sc) const throw (Exception); void writeAlignment(const std::string& path, const SiteContainer& sc, bool overwrite = true) const throw (Exception) { AbstractOAlignment::writeAlignment(path, sc, overwrite); } /** @} */ /** * @name The IOSequence interface. * * @{ */ const std::string getFormatName() const { return "Clustal"; } const std::string getFormatDescription() const { return "The Clustal alignment tool output format."; } /** @} */ /** * @return true if the names are to be checked when reading sequences from files. */ bool checkNames() const { return checkNames_; } /** * @brief Tell whether the sequence names should be checked when reading from files. * * @param yn whether the sequence names should be checked when reading from files. */ void checkNames(bool yn) { checkNames_ = yn; } }; } // end of namespace bpp. #endif // _CLUSTAL_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/Phylip.cpp000644 000000 000000 00000024563 12147656566 017453 0ustar00rootroot000000 000000 // // File: Phylip.cpp // Created by: Julien Dutheil // Created on: Mon Oct 27 12:22:56 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "Phylip.h" #include "../Container/SequenceContainerTools.h" #include #include #include using namespace bpp; // From the STL: #include using namespace std; /******************************************************************************/ const std::vector Phylip::splitNameAndSequence(const std::string& s) const throw (Exception) { vector v(2); if (extended_) { string::size_type index = s.find(namesSplit_); if(index == string::npos) throw Exception("No sequence name found."); v[0] = TextTools::removeSurroundingWhiteSpaces(s.substr(0, index)); v[1] = TextTools::removeFirstWhiteSpaces (s.substr(index + namesSplit_.size())); //There may be more than 2 white spaces. } else { v[0] = TextTools::removeSurroundingWhiteSpaces(s.substr(0, 10)); v[1] = s.substr(10); } return v; } /******************************************************************************/ void Phylip::readSequential(std::istream& in, SiteContainer& asc) const throw (Exception) { string temp; //Ignore first line: getline(in, temp, '\n'); // Copy current line in temporary string temp = TextTools::removeSurroundingWhiteSpaces(FileTools::getNextLine(in)); string name = ""; string seq = ""; while (!in.eof()) { // Read each sequence: vector v; bool hasName = true; try { v = splitNameAndSequence(temp); } catch (Exception & e) { hasName = false; } if (hasName) { // a new sequence is found: if (!TextTools::isEmpty(name)) //If this is not the first sequence! { // Add the previous sequence to the container: asc.addSequence(BasicSequence(name, seq, asc.getAlphabet()), checkNames_); } name = v[0]; seq = v[1]; } else { //No sequence name found. if (TextTools::isEmpty(name)) throw Exception("First sequence in file has no name!"); seq += TextTools::removeWhiteSpaces(temp); } //while(!TextTools::isEmpty(temp)) //{ // //Sequences are separated by at least one blank line: // getline(in, temp, '\n'); // read next line in file. // seq += TextTools::removeWhiteSpaces(temp); //} //end of this sequence: temp = TextTools::removeSurroundingWhiteSpaces(FileTools::getNextLine(in)); } // Add last sequence: asc.addSequence(BasicSequence(name, seq, asc.getAlphabet()), checkNames_); } /******************************************************************************/ void Phylip::readInterleaved(std::istream& in, SiteContainer& asc) const throw (Exception) { string temp; //Read first line: getline(in, temp, '\n'); // Copy current line in temporary string StringTokenizer st(temp); unsigned int nbSequences = TextTools::to(st.nextToken()); //int nbSites = TextTools::toInt(st.nextToken()); temp = FileTools::getNextLine(in); vector names, seqs; // Read first block: for (unsigned int i = 0; i < nbSequences && !in.eof() && !TextTools::isEmpty(temp); i++) { vector v = splitNameAndSequence(temp); names.push_back(v[0]); seqs.push_back(v[1]); getline(in, temp, '\n'); // read next line in file. } //Then read all other blocks: temp = FileTools::getNextLine(in); while (!in.eof()) { for (unsigned int i = 0; i < names.size(); i++) { if (TextTools::isEmpty(temp)) throw IOException("Phylip::readInterleaved. Bad file,there are not the same number of sequence in each block."); seqs[i] += TextTools::removeWhiteSpaces(temp); getline(in, temp, '\n'); // read next line in file. } temp = FileTools::getNextLine(in); } for (unsigned int i = 0; i < names.size(); i++) { asc.addSequence(BasicSequence(names[i], seqs[i], asc.getAlphabet()), checkNames_); } } /******************************************************************************/ void Phylip::appendAlignmentFromStream(std::istream& input, SiteContainer& vsc) const throw (Exception) { // Checking the existence of specified file if (!input) { throw IOException ("Phylip::read: fail to open file"); } if(sequential_) readSequential (input, vsc); else readInterleaved(input, vsc); } /******************************************************************************/ unsigned int Phylip::getNumberOfSequences(const std::string& path) const throw (IOException) { // Checking the existence of specified file ifstream file (path.c_str(), ios::in); if (! file) { throw IOException ("Phylip::getNumberOfSequences: failed to open file"); } string firstLine = FileTools::getNextLine(file); StringTokenizer st(firstLine, " \t"); istringstream iss(st.nextToken()); int nb; iss >> nb; file.close(); return nb; } /******************************************************************************/ std::vector Phylip::getSizedNames(const std::vector& names) const { vector sizedNames(names.size()); if (extended_) { //Add 6 white spaces to the larger name and align other names. //First, determine the size of the wider name: size_t sizeMax = 0; for (size_t i = 0; i < names.size(); i++) if (names[i].size() > sizeMax) sizeMax = names[i].size(); //Quite easy ;-) Now update all lengths: for (size_t i = 0; i < names.size(); i++) sizedNames[i] = TextTools::resizeRight(names[i], sizeMax) + namesSplit_; } else { //We trunc all names to ten characters: for(unsigned int i = 0; i < names.size(); i++) sizedNames[i] = TextTools::resizeRight(names[i], 10); cout << "Warning: names have been truncated to 10 characters. They may be ambiguous sequence names then." << endl; } return sizedNames; } /******************************************************************************/ void Phylip::writeSequential(std::ostream& out, const SequenceContainer& sc, int charsByLine) const { //cout << "Write sequential" << endl; size_t numberOfSites = sc.getSequence(sc.getSequencesNames()[0]).size() * sc.getAlphabet()->getStateCodingSize(); out << sc.getNumberOfSequences() << " " << numberOfSites << endl; vector seqNames = sc.getSequencesNames(); vector names = getSizedNames(seqNames); for (size_t i = 0; i < seqNames.size(); i++) { vector seq = TextTools::split(sc.toString(seqNames[i]), charsByLine); out << names[i] << seq[0] << endl; for (unsigned int j = 1; j < seq.size(); j++) { out << string(names[i].size(), ' ') << seq[j] << endl; } out << endl; } } void Phylip::writeInterleaved(std::ostream& out, const SequenceContainer& sc, int charsByLine) const { //cout << "Write interleaved;" << endl; size_t numberOfSites = sc.getSequence(sc.getSequencesNames()[0]).size() * sc.getAlphabet()->getStateCodingSize(); out << sc.getNumberOfSequences() << " " << numberOfSites << endl; vector seqNames = sc.getSequencesNames(); vector names = getSizedNames(seqNames); //Split sequences: vector< vector > seqs(sc.getNumberOfSequences()); for (size_t i = 0; i < seqNames.size(); i++) { seqs[i] = TextTools::split(sc.toString(seqNames[i]), charsByLine); } //Write first block: for (size_t i = 0; i < names.size(); i++) { out << names[i] << seqs[i][0] << endl; } out << endl; //Write other blocks: for (size_t j = 1; j < seqs[0].size(); j++) { for (unsigned int i = 0; i < sc.getNumberOfSequences(); i++) { out << seqs[i][j] << endl; } out << endl; } } /******************************************************************************/ void Phylip::writeAlignment(std::ostream& output, const SiteContainer& sc) const throw (Exception) { //First must check if all sequences are aligned: if (sc.getNumberOfSequences() == 0) throw Exception("Phylip::write. SequenceContainer appear to contain no sequence."); // Checking the existence of specified file, and possibility to open it in write mode if (!output) { throw IOException ("Phylip::write : failed to open file"); } if (sequential_) writeSequential (output, sc, charsByLine_); else writeInterleaved(output, sc, charsByLine_); } /******************************************************************************/ const std::string Phylip::getFormatName() const { return "Phylip file, " + string(extended_ ? "extended," : "") + string(sequential_ ? "sequential" : "interleaved"); } /******************************************************************************/ const std::string Phylip::getFormatDescription() const { return "Phylip file format, sequential and interleaved. PAML extension also supported."; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Io/Mase.h000644 000000 000000 00000020712 12147656566 016530 0ustar00rootroot000000 000000 // // File: Mase.h // Created by: Guillaume Deuchst // Julien Dutheil // Created on: ? // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _MASE_H_ #define _MASE_H_ #include "AbstractISequence.h" #include "AbstractIAlignment.h" #include "AbstractOSequence.h" #include "../Sequence.h" #include "../Container/SequenceContainer.h" #include "../Container/VectorSequenceContainer.h" #include #include namespace bpp { /** * @brief A class to store information from the header of Mase files. * * @author Julien Dutheil */ class MaseHeader { private: mutable std::map trees_; mutable std::map > siteSelections_; mutable std::map > sequenceSelections_; public: MaseHeader(): trees_(), siteSelections_(), sequenceSelections_() {} virtual ~MaseHeader() {} public: size_t getNumberOfTrees() const { return trees_.size(); } size_t getNumberOfSiteSelections() const { return siteSelections_.size(); } size_t getNumberOfSequenceSelections() const { return sequenceSelections_.size(); } std::vector getTreeNames() const { return MapTools::getKeys(trees_); } std::vector getSiteSelectionNames() const { return MapTools::getKeys(siteSelections_); } std::vector getSequenceSelectionNames() const { return MapTools::getKeys(sequenceSelections_); } const std::string& getTree(const std::string& name) const throw (Exception) { if (trees_.find(name) != trees_.end()) { return trees_[name]; } else { throw Exception("MaseHeader::getTree. No tree with name " + name); } } const MultiRange& getSiteSelection(const std::string& name) const throw (Exception) { if (siteSelections_.find(name) != siteSelections_.end()) { return siteSelections_[name]; } else { throw Exception("MaseHeader::getSiteSelection. No site selection with name " + name); } } const std::vector& getSequenceSelection(const std::string& name) const throw (Exception) { if (sequenceSelections_.find(name) != sequenceSelections_.end()) { return sequenceSelections_[name]; } else { throw Exception("MaseHeader::getSequenceSelection. No sequence selection with name " + name); } } void setTree(const std::string& name, const std::string& tree) { trees_[name] = tree; } void setSiteSelection(const std::string& name, const MultiRange& ranges) { siteSelections_[name] = ranges; } void setSequenceSelection(const std::string& name, const std::vector& set) { sequenceSelections_[name] = set; } }; /** * @brief The mase sequence file format. * * In addition to traditional read and write method, this class offers overloaded method * with MaseHeader objects, dedicated to header information storage. If used, then the header * of the mase file will be parsed accordingly. Otherwise, the header lines will be stored * as general comments. * * @see MaseTools for alternative way of parsing headers. */ class Mase: public AbstractISequence, public AbstractIAlignment, public AbstractOSequence { private: /** * @brief The maximum number of chars to be written on a line. */ unsigned int charsByLine_; bool checkNames_; public : /** * @brief Build a new Mase object. * * @param charsByLine Number of character per line when writing files. * @param checkSequenceNames Tell if the names in the file should be checked for unicity (slower, in o(n*n) where n is the number of sequences). */ Mase(unsigned int charsByLine = 100, bool checkSequenceNames = true): charsByLine_(charsByLine), checkNames_(checkSequenceNames) {} // Class destructor virtual ~Mase() {} public: /** * @name Reading method including header: * * @{ */ VectorSequenceContainer* readMeta(std::istream& input, const Alphabet* alpha, MaseHeader& header) const throw (Exception) { readHeader_(input, header); return AbstractISequence::readSequences(input, alpha); } VectorSequenceContainer* readMeta(std::string& path, const Alphabet* alpha, MaseHeader& header) const throw (Exception) { std::ifstream input(path.c_str(), std::ios::in); VectorSequenceContainer* sc = readMeta(input, alpha, header); input.close(); return sc; } /** @} */ /** * @name The AbstractISequence interface. * * @{ */ void appendSequencesFromStream(std::istream& input, SequenceContainer& sc) const throw (Exception); /** @} */ /** * @name The AbstractIAlignment interface. * * @{ */ void appendAlignmentFromStream(std::istream& input, SiteContainer& sc) const throw (Exception) { appendSequencesFromStream(input, sc); //This might cast an exception if sequences are not aligned! } /** @} */ /** * @name The OSequence interface. * * @{ */ void writeSequences(std::ostream& output, const SequenceContainer& sc) const throw (Exception); void writeSequences(const std::string& path, const SequenceContainer& sc, bool overwrite = true) const throw (Exception) { AbstractOSequence::writeSequences(path, sc, overwrite); } /** @} */ /** * @name Writing methods including header: * * @{ */ void writeMeta(std::ostream& output, const SequenceContainer& sc, const MaseHeader& header) const throw (Exception) { writeHeader_(output, header); writeSequences(output, sc); } void writeMeta(const std::string& path, const SequenceContainer& sc, const MaseHeader& header, bool overwrite = true) const throw (Exception) { // Open file in specified mode std::ofstream output(path.c_str(), overwrite ? (std::ios::out) : (std::ios::out | std::ios::app)); writeHeader_(output, header); writeSequences(output, sc); output.close(); } /** @} */ /** * @name The IOSequence interface. * * @{ */ const std::string getFormatName() const { return "MASE file"; } const std::string getFormatDescription() const { return "Optional file comments (preceeded by ;;), sequence comments (preceeded by ;), sequence name, sequence"; } /** @} */ /** * @return true if the names are to be checked when reading sequences from files. */ bool checkNames() const { return checkNames_; } /** * @brief Tell whether the sequence names should be checked when reading from files. * * @param yn whether the sequence names should be checked when reading from files. */ void checkNames(bool yn) { checkNames_ = yn; } private: void readHeader_(std::istream& input, MaseHeader& header) const throw (Exception); void writeHeader_(std::ostream& output, const MaseHeader& header) const; }; } //end of namespace bpp. #endif // _MASE_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/MaseTools.h000644 000000 000000 00000013437 12147656566 017557 0ustar00rootroot000000 000000 // // File: MaseTools.h // Created by: Julien Dutheil // Created on: Tue Apr 1 09:16:59 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _MASETOOLS_H_ #define _MASETOOLS_H_ #include "../Container/SequenceContainer.h" #include "../Container/OrderedSequenceContainer.h" #include "../Container/SequenceContainerTools.h" #include "../Container/SiteContainer.h" #include "../Container/SiteContainerTools.h" #include namespace bpp { /** * @brief Utilitary methods that deal with the Mase format. * * This class particularily covers the Mase+ format, which allows * site and sequence selection. * Mase+ tags are in the header of the mase file, which is stored * in the 'general comment' section of sequence containers. * Most of the methods here hence work on the general comments associated * to a container. */ class MaseTools { public: /** * @brief Get a site selection from a Mase+ header file. * * @param maseFileHeader The header of the mase+ file as comments lines. * @param setName The name of the set to retrieve. * @throw IOException If the specified set is not found. */ static SiteSelection getSiteSet(const Comments& maseFileHeader, const std::string& setName) throw (IOException); /** * @brief Get a sequence selection from a Mase+ header file. * * @param maseFileHeader The header of the mase+ file as comments lines. * @param setName The name of the set to retrieve. * @throw IOException If the specified set is not found. */ static SequenceSelection getSequenceSet(const Comments& maseFileHeader, const std::string& setName) throw (IOException); /** * @brief Create a new container corresponding to a site set given in the mase+ format. * * A new VectorSiteContainer is created, whose destruction is up to the user. * The container passed as argument must have 'general comments' in the mase+ format. * This function calls the getSiteSet() function on the comments and then calls for * SiteContainerTools::getSelectedSites() on the selection. * * @param sequences The container to get the sites from. * @param setName The name of the set to retrieve. * @throw IOException If the specified set is not found. */ static SiteContainer* getSelectedSites(const SiteContainer& sequences, const std::string& setName) throw (IOException); /** * @brief Create a new container corresponding to a site set given in the mase+ format. * * A new VectorSequenceContainer is created, whose destruction is up to the user. * The container passed as argument must have 'general comments' in the mase+ format. * This function calls the getSequenceSet() function on the comments and then calls for * SiteContainerTools::getSelectedSequences() on the selection. * * @param sequences The container to get the sequence from. * @param setName The name of the set to retrieve. * @throw IOException If the specified set is not found. */ static SequenceContainer* getSelectedSequences(const OrderedSequenceContainer& sequences, const std::string & setName) throw (IOException); /** * @brief Get a list of all available site selections. * * @param maseHeader Comments as described in the Mase+ format specification. * @return A vector of selection names. */ static std::map getAvailableSiteSelections(const Comments & maseHeader); /** * @brief Get a list of all available sequences selections. * * @param maseHeader Comments as described in the Mase+ format specification. * @return A vector of selection names. */ static std::map getAvailableSequenceSelections(const Comments & maseHeader); /** * @brief Get the phase of a given coding region from a mase+ header. * * Look for a /codon_start tag with a phase indice and a site selection with name setName. * * @param maseFileHeader Comments in Mase+ format. * @param setName a cds site selection name. * @return 1,2 or 3. * @throw Exception If no corresponding tag found in file. */ static size_t getPhase(const Comments & maseFileHeader, const std::string &setName) throw (Exception); }; } //end of namespace bpp. #endif //_MASETOOLS_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOAlignmentReaderFormat.h000644 000000 000000 00000006407 12147656566 022643 0ustar00rootroot000000 000000 // // File: BppOAlignmentReaderFormat.h // Created by: Julien Dutheil // Created on: Friday September 15th, 22:04 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _BPPOALIGNMENTREADERFORMAT_H_ #define _BPPOALIGNMENTREADERFORMAT_H_ #include "IoSequenceFactory.h" namespace bpp { /** * @brief Sequence I/O in BppO format. * * Creates a new IAlignment object according to * distribution description syntax (see the Bio++ Program Suite * manual for a detailed description of this syntax). * */ class BppOAlignmentReaderFormat: public virtual IOFormat { private: bool verbose_; std::map unparsedArguments_; public: BppOAlignmentReaderFormat(bool verbose = true): verbose_(verbose), unparsedArguments_() {} virtual ~BppOAlignmentReaderFormat() {} public: const std::string getFormatName() const { return "BppO"; } const std::string getFormatDescription() const { return "Bpp Options format."; } const std::string getDataType() const { return "Alignment reader"; } /** * @brief Read a IAlignment object from a string. * * @param description A string describing the reader in the keyval syntax. * @return A new IAlignment object according to options specified. * @throw Exception if an error occured. */ IAlignment* read(const std::string& description) throw (Exception); /** * @return The arguments and their unparsed values from the last call of the read function, if there are any. */ virtual const std::map& getUnparsedArguments() const { return unparsedArguments_; } }; } //end of namespace bpp. #endif //_BPPOALIGNMENTREADERFORMAT_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/SequenceFileIndex.h000644 000000 000000 00000005101 12147656566 021176 0ustar00rootroot000000 000000 // // File: SequenceFileIndex.h // Author: Sylvain Gaillard // Created: 19/04/2010 10:16:13 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCEFILEINDEX_H_ #define _SEQUENCEFILEINDEX_H_ #include #include namespace bpp { /** * @brief Index to retrieve Sequence in a file * * This class is designed to build an in-memory index of a Sequence file in * order to retrieve Sequence given its ID. * * @author Sylvain Gaillard */ class SequenceFileIndex { public: virtual ~SequenceFileIndex() {} /** * @brief Build the index given a path to the file. */ virtual void build(const std::string& path) throw (Exception) = 0; /** * @brief Get the position of a Sequence given its ID. */ virtual std::streampos getSequencePosition(const std::string& id) const throw (Exception) = 0; /** * @brief Get the number of sequences */ virtual size_t getNumberOfSequences() const throw (Exception) = 0; }; } #endif // _SEQUENCEFILEINDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOSequenceStreamReaderFormat.cpp000644 000000 000000 00000005130 12147656566 024174 0ustar00rootroot000000 000000 // // File: BppOSequenceReaderFormat.cpp // Created by: Julien Dutheil // Created on: Tuesday November 20th, 13:27 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "BppOSequenceStreamReaderFormat.h" #include "Fasta.h" #include #include #include using namespace bpp; using namespace std; ISequenceStream* BppOSequenceStreamReaderFormat::read(const std::string& description) throw (Exception) { unparsedArguments_.clear(); string format = ""; KeyvalTools::parseProcedure(description, format, unparsedArguments_); auto_ptr iSeq; if (format == "Fasta") { bool strictNames = ApplicationTools::getBooleanParameter("strict_names", unparsedArguments_, false, "", true, false); bool extended = ApplicationTools::getBooleanParameter("extended", unparsedArguments_, false, "", true, false); iSeq.reset(new Fasta(100, true, extended, strictNames)); } else { throw Exception("Sequence format '" + format + "' unknown."); } return iSeq.release(); } bpp-seq-2.1.0/src/Bpp/Seq/Io/GenBank.cpp000644 000000 000000 00000006252 12147656566 017506 0ustar00rootroot000000 000000 // // File: GenBank.cpp // Created by: Julien Dutheil // Created on: Tue Oct 2 2007 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "GenBank.h" #include #include using namespace bpp; using namespace std; /****************************************************************************************/ void GenBank::appendSequencesFromStream(std::istream& input, SequenceContainer& vsc) const throw (Exception) { if (!input) { throw IOException ("GenBank::read: fail to open file"); } string temp, name, sequence = ""; // Initialization // Main loop : for all file lines while (!input.eof()) { getline(input, temp, '\n'); // Copy current line in temporary string if(temp.size() >= 9 && temp.substr(0,9) == "ACCESSION") { name = TextTools::removeSurroundingWhiteSpaces(temp.substr(10)); StringTokenizer st(name, " "); name = st.nextToken(); //cout << name << endl; } if (temp.size() >=6 && temp.substr(0,6) == "ORIGIN") { sequence = ""; getline(input, temp, '\n'); // Copy current line in temporary string while (!input.eof() && temp.size() > 2 && temp.substr(0,2) != "//") { sequence += TextTools::removeWhiteSpaces(temp.substr(10)); getline(input, temp, '\n'); // Copy current line in temporary string } if(name == "") throw Exception("GenBank::read(). Sequence with no ACCESSION number!"); Sequence* seq = new BasicSequence(name, sequence, vsc.getAlphabet()); vsc.addSequence(*seq, true); name = ""; } } } /****************************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Io/Dcse.h000644 000000 000000 00000006352 12147656566 016525 0ustar00rootroot000000 000000 // // File: DCSE.h // Created by: Julien Dutheil // Created on: Wed Mar 3 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _DCSE_H_ #define _DCSE_H_ #include "AbstractIAlignment.h" #include "../Sequence.h" #include "../Container/SequenceContainer.h" #include "../Container/AlignedSequenceContainer.h" namespace bpp { /** * @brief Support for the Dedicated Comparative Sequence Editor format. * * Only the sequence information is retrieved. * All structural information is dropped for now. * * A description of this format may be found here: * http://www.psb.ugent.be/rRNA/help/formats/aliformat.html */ class DCSE : public AbstractIAlignment, public virtual ISequence { public: DCSE() {}; virtual ~DCSE() {}; public: /** * @name The AbstractIAlignment interface. * * @{ */ void appendAlignmentFromStream(std::istream& input, SiteContainer& sc) const throw (Exception); /** @} */ /** * @name The ISequence interface. * * As a SiteContainer is a subclass of SequenceContainer, we hereby implement the ISequence * interface by downcasting the interface. * * @{ */ virtual SequenceContainer* readSequences(std::istream& input, const Alphabet* alpha) const throw (Exception) { return readAlignment(input, alpha); } virtual SequenceContainer* readSequences(const std::string& path, const Alphabet* alpha) const throw (Exception) { return readAlignment(path, alpha); } /** @} */ /** * @name The IOSequence interface. * * @{ */ const std::string getFormatName() const; const std::string getFormatDescription() const; /** @} */ }; } //end of namespace bpp. #endif // _DCSE_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/NexusTools.cpp000644 000000 000000 00000007330 12147656566 020322 0ustar00rootroot000000 000000 // // File: NexusTools.cpp // Created by: Julien Dutheil // Created on: Wed May 27 19:30 2009 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "NexusTools.h" #include #include using namespace bpp; using namespace std; std::string NexusTools::getNextNonCommentLine(std::istream& input) { string line = TextTools::removeSurroundingWhiteSpaces(FileTools::getNextLine(input)); bool test = true; unsigned int countOpen = 0; unsigned int countClosed = 0; while(test) { if (line[0] == '[') { countOpen++; } if (line[line.size() - 1] == ']') { countClosed++; } if(countOpen > 0) line = TextTools::removeSurroundingWhiteSpaces(FileTools::getNextLine(input)); if(countOpen == countClosed) test = false; } return line; } bool NexusTools::getNextCommand(std::istream& input, std::string& name, std::string& arguments, bool lineBrk) throw (IOException) { // Checking if the stream is readable if (! input) { throw IOException ("NexusTools::getNextCommand(). Failed to read from stream"); } string line = TextTools::removeSurroundingWhiteSpaces(getNextNonCommentLine(input)); if (TextTools::startsWith(line, "BEGIN")) { return false; } // Check if the command stands on one line: bool commandComplete = TextTools::endsWith(line, ";"); if (commandComplete) line = line.substr(0, line.size() - 1); // Get the command name, as the first block: string::size_type limit = line.find(" "); if (limit == string::npos) { name = line; arguments = ""; if (commandComplete) { //Command with no argument: return true; } } else { name = line.substr(0, limit); arguments = line.substr(limit + 1); } //Then parse the next lines: while(!commandComplete) { if (input.eof()) { throw IOException ("NexusTools::getNextCommand(). Reached end of file before the end of the command could be found"); } line = TextTools::removeSurroundingWhiteSpaces(getNextNonCommentLine(input)); commandComplete = TextTools::endsWith(line, ";"); if (commandComplete) line = line.substr(0, line.size() - 1); if(lineBrk) arguments += "\n"; arguments += line; } return true; } bpp-seq-2.1.0/src/Bpp/Seq/Io/Mase.cpp000644 000000 000000 00000026676 12147656566 017102 0ustar00rootroot000000 000000 // // File Mase.cpp // Author : Guillaume Deuchst // Julien Dutheil // Last modification: Tuesday August 21 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "Mase.h" #include "../StringSequenceTools.h" using namespace bpp; using namespace std; /****************************************************************************************/ void Mase::appendSequencesFromStream(std::istream& input, SequenceContainer& vsc) const throw (Exception) { if (!input) { throw IOException ("Mase::read : fail to open file"); } // Initialization Comments seqComments, fileComments; string temp, name, sequence = ""; bool comments = false; // Get current general comments is VectorSequenceContainer fileComments = vsc.getGeneralComments(); // Main loop : for all file lines while (!input.eof()) { getline(input, temp, '\n'); // Copy current line in temporary string // If first character is ; if (temp[0] == ';') { // If second character is also ; if (temp[1] == ';') { // File comments isolation temp.erase(0,2); // Characters ;; deletion if(temp != "") fileComments.push_back(temp); } else { // If a name and a sequence were founded if ((name != "") && (sequence != "")) { // New sequence creation, and addition in existing VectorSequenceContainer vsc.addSequence(BasicSequence(name, sequence, seqComments, vsc.getAlphabet()), checkNames_); name = ""; sequence = ""; seqComments.clear(); } // Sequence commentaries isolation temp.erase(temp.begin()); // Character ; deletion if (temp != "") seqComments.push_back(temp); comments = true; } } else { // If sequence commentaries were just isolated if (comments) { // Sequence name isolation name = temp; comments = false; } else sequence += temp; // Sequence isolation } } // Addition of the last sequence in file if ((name != "") && (sequence != "")) { vsc.addSequence(BasicSequence(name, sequence, seqComments, vsc.getAlphabet()), checkNames_); } // Set new general comments in VectorSequenceContainer (old + new comments) vsc.setGeneralComments(fileComments); } /****************************************************************************************/ void Mase::writeSequences(ostream& output, const SequenceContainer& sc) const throw (Exception) { // Checking the existence of specified file, and possibility to open it in write mode if (!output) { throw IOException ("Mase::write : failed to open file"); } Comments comments = sc.getGeneralComments(); // Writing all general comments in file if (comments.size() == 0) { output << ";;" << endl; } for (unsigned int i = 0 ; i < comments.size() ; i++) { output << ";;" << comments[i] << endl; } string seq, temp = ""; // Initialization // Main loop : for all sequences vector names = sc.getSequencesNames(); for (unsigned int i = 0 ; i < names.size() ; i ++) { comments = sc.getComments(names[i]); // Writing all sequence comments in file // If no comments are associated with current sequence, an empy commentary line will be writed if (comments.size() == 0) { output << ";" << endl; } else { for (unsigned int j = 0 ; j < comments.size() ; j++) { output << ";" << comments[j] << endl; } } // Sequence name writing output << names[i] << endl; // Sequence cutting to specified characters number per line seq = sc.toString(names[i]); while (seq != "") { if (seq.size() > charsByLine_) { temp = seq; temp.erase(temp.begin() + charsByLine_ , temp.end()); output << temp << endl; seq.erase(seq.begin(), seq.begin() + charsByLine_); } else { output << seq << endl; seq = ""; } } } } /****************************************************************************************/ void Mase::readHeader_(std::istream& input, MaseHeader& header) const throw (Exception) { do { //Check if the line is a header line: if (input.peek() == ';') { char c; input.get(c); if (input.peek() == ';') { input.get(c); string line = FileTools::getNextLine(input); //Check the type of line... //Site selection: string::size_type index = line.find("# of"); if (index < line.npos) { StringTokenizer st(string(line.begin() + index + 4, line.end()), " \t=;"); st.nextToken(); //skip next word: may be 'regions' or 'segments' or else ;-) unsigned int numberOfSegments = TextTools::to(st.nextToken()); string name = st.unparseRemainingTokens(); //Then look for the set definition: MultiRange siteSelection; while (siteSelection.size() < numberOfSegments) { line = FileTools::getNextLine(input); if (line[0] != ';' || line[1] != ';') throw Exception("Mase::readHeader_(): corrupted file, site selection " + name + " is incomplete. Aborting."); line = line.substr(2); StringTokenizer st2(line); while (st2.hasMoreToken()) { StringTokenizer st3(st2.nextToken(), ","); unsigned int begin = TextTools::to(st3.nextToken()); unsigned int end = TextTools::to(st3.nextToken()); //WARNING!!! In the mase+ format, sites numerotation is 1-based, including, while ranges are 0-based, [a, b[: siteSelection.addRange(Range(begin - 1, end)); } if (siteSelection.size() > numberOfSegments) throw Exception("Mase::readHeader_(): incorrected file, found " + TextTools::toString(siteSelection.size()) + "segments while expected " + TextTools::toString(numberOfSegments)); } header.setSiteSelection(name, siteSelection); } else { //Sequence selection: index = line.find("@ of"); if (index < line.npos) { StringTokenizer st(line.substr(index + 4), " \t=;"); st.nextToken(); //skip next word: may be 'sequences' or else ;-) unsigned int numberOfSequences = TextTools::to(st.nextToken()); string name = st.unparseRemainingTokens(); //The look for the set definition: vector sequenceSelection; while (sequenceSelection.size() < numberOfSequences) { line = FileTools::getNextLine(input); if (line[0] != ';' || line[1] != ';') throw Exception("Mase::readHeader_(): corrupted file, sequence selection " + name + " is incomplete. Aborting."); line = line.substr(2); StringTokenizer st2(line, ", "); while (st2.hasMoreToken()) { unsigned int pos = TextTools::to(st2.nextToken()); //WARNING!!! In the mase+ format, sequence numerotation is 1-based sequenceSelection.push_back(pos); } if (sequenceSelection.size() > numberOfSequences) throw Exception("Mase::readHeader_(): incorrected file, found " + TextTools::toString(sequenceSelection.size()) + "sequences while expected " + TextTools::toString(numberOfSequences)); } header.setSequenceSelection(name, sequenceSelection); } else { //Tree: index = line.find("$"); if (index < line.npos) { string name = TextTools::removeSurroundingWhiteSpaces(line.substr(index + 1)); //Here we stop if the line ends with a ";" string tree = ""; do { line = FileTools::getNextLine(input); if (line[0] != ';' || line[1] != ';') throw Exception("Mase::readHeader_(): corrupted file, tree " + name + " is incomplete. Aborting."); line = TextTools::removeSurroundingWhiteSpaces(line.substr(2)); tree += line; } while (! TextTools::endsWith(line, ";")); header.setTree(name, tree); } } } } else { input.putback(c); break; } } } while (true); } /****************************************************************************************/ void Mase::writeHeader_(std::ostream& output, const MaseHeader& header) const { //Write trees: vector treeNames = header.getTreeNames(); for (size_t i = 0; i < treeNames.size(); ++i) { output << ";;$ " + treeNames[i] << endl; output << ";;" + header.getTree(treeNames[i]); output << endl; } //Write site selections: vector siteSelectionNames = header.getSiteSelectionNames(); for (size_t i = 0; i < siteSelectionNames.size(); ++i) { MultiRange ranges = header.getSiteSelection(siteSelectionNames[i]); output << ";;# of segments=" << ranges.size() << " " << siteSelectionNames[i] << endl; output << ";;"; for (unsigned int j = 0; j < ranges.size(); ++j) { output << " " << (ranges.getRange(j).begin() + 1) << "," << ranges.getRange(j).end(); if ((j + 1) % 10 == 0) output << endl << ";;"; } output << endl; } //Write sequence selections: vector sequenceSelectionNames = header.getSequenceSelectionNames(); for (size_t i = 0; i < sequenceSelectionNames.size(); ++i) { vector set = header.getSequenceSelection(sequenceSelectionNames[i]); output << ";;@ of species=" << set.size() << " " << sequenceSelectionNames[i] << endl; output << ";;"; for (unsigned int j = 0; j < set.size(); ++j) { output << " " << set[j]; if ((j + 1) % 10 == 0) output << endl << ";;"; } output << endl; } } /****************************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Io/OSequenceStream.h000644 000000 000000 00000004703 12147656566 020710 0ustar00rootroot000000 000000 // // File OSequenceStream.h // Author: Sylvain Gaillard // Created: 19/08/2009 // /* Copyright or © or Copr. CNRS, (August 19, 2009) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _OSEQUENCESTREAM_H_ #define _OSEQUENCESTREAM_H_ #include "IoSequenceStream.h" #include "../Sequence.h" #include "../Alphabet/Alphabet.h" #include namespace bpp { /** * @brief The OSequenceStream interface. * * Interface for streaming sequences output. * * @author Sylvain Gaillard */ class OSequenceStream: public virtual IOSequenceStream { public: OSequenceStream() {} virtual ~OSequenceStream() {} public: /** * @brief Read sequence from stream. * * Read one sequence from a stream. * * @param output The stream where write. * @param seq The sequence to write. * @throw Exception IOExecption. */ virtual void writeSequence(std::ostream& output, const Sequence& seq) const throw (Exception) = 0; }; } //end of namespace bpp. #endif // _ISEQUENCESTREAM_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOSequenceReaderFormat.cpp000644 000000 000000 00000012030 12147656566 023015 0ustar00rootroot000000 000000 // // File: BppOSequenceReaderFormat.cpp // Created by: Julien Dutheil // Created on: Friday September 14th, 14:08 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "BppOSequenceReaderFormat.h" #include "Mase.h" #include "Phylip.h" #include "Fasta.h" #include "Clustal.h" #include "Dcse.h" #include "GenBank.h" #include "NexusIoSequence.h" #include #include #include using namespace bpp; using namespace std; ISequence* BppOSequenceReaderFormat::read(const std::string& description) throw (Exception) { unparsedArguments_.clear(); string format = ""; KeyvalTools::parseProcedure(description, format, unparsedArguments_); auto_ptr iSeq; if (format == "Mase") { iSeq.reset(new Mase()); } else if (format == "Phylip") { bool sequential = true, extended = true; string split = " "; if (unparsedArguments_.find("order") != unparsedArguments_.end()) { if (unparsedArguments_["order"] == "sequential") sequential = true; else if (unparsedArguments_["order"] == "interleaved") sequential = false; else ApplicationTools::displayWarning("Argument '" + unparsedArguments_["order"] + "' for argument 'Phylip#order' is unknown. " + "Default used instead: sequential."); } else ApplicationTools::displayWarning("Argument 'Phylip#order' not found. Default used instead: sequential."); if (unparsedArguments_.find("type") != unparsedArguments_.end()) { if (unparsedArguments_["type"] == "extended") { extended = true; split = ApplicationTools::getStringParameter("split", unparsedArguments_, "spaces", "", true, false); if (split == "spaces") split = " "; else if (split == "tab") split = "\t"; else throw Exception("Unknown option for Phylip#split: " + split); } else if (unparsedArguments_["type"] == "classic") extended = false; else ApplicationTools::displayWarning("Argument '" + unparsedArguments_["type"] + "' for parameter 'Phylip#type' is unknown. " + "Default used instead: extended."); } else ApplicationTools::displayWarning("Argument 'Phylip#type' not found. Default used instead: extended."); iSeq.reset(new Phylip(extended, sequential, 100, true, split)); } else if (format == "Fasta") { bool strictNames = ApplicationTools::getBooleanParameter("strict_names", unparsedArguments_, false, "", true, false); bool extended = ApplicationTools::getBooleanParameter("extended", unparsedArguments_, false, "", true, false); iSeq.reset(new Fasta(100, true, extended, strictNames)); } else if (format == "Clustal") { unsigned int extraSpaces = ApplicationTools::getParameter("extraSpaces", unparsedArguments_, 0, "", true, false); iSeq.reset(new Clustal(true, extraSpaces)); } else if (format == "Dcse") { iSeq.reset(new DCSE()); } else if (format == "GenBank") { iSeq.reset(reinterpret_cast(new GenBank())); // This is required to remove a strict-aliasing warning in gcc 4.4 } else if (format == "Nexus") { iSeq.reset(new NexusIOSequence()); } else { throw Exception("Sequence format '" + format + "' unknown."); } return iSeq.release(); } bpp-seq-2.1.0/src/Bpp/Seq/Io/NexusIoSequence.cpp000644 000000 000000 00000017020 12147656566 021257 0ustar00rootroot000000 000000 // // File: NexusIOSequence.cpp // Created by: Julien Dutheil // Created on: Wed May 27 16:15 2009 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "NexusIoSequence.h" #include "NexusTools.h" #include "../Container/SiteContainerTools.h" #include "../Alphabet/AlphabetTools.h" #include #include #include using namespace bpp; // From the STL: #include using namespace std; /******************************************************************************/ const std::vector NexusIOSequence::splitNameAndSequence_(const std::string& s) const throw (Exception) { vector v(2); string::size_type index = s.find(" "); if(index == string::npos) throw Exception("NexusIOSequence::splitNameAndSequence_(). No sequence name found."); v[0] = TextTools::removeSurroundingWhiteSpaces(s.substr(0, index)); v[1] = TextTools::removeFirstWhiteSpaces(s.substr(index + 1)); return v; } /******************************************************************************/ void NexusIOSequence::appendAlignmentFromStream(std::istream& input, SiteContainer& vsc) const throw (Exception) { // Checking the existence of specified file if (!input) { throw IOException ("NexusIOSequence::read(). Fail to open file"); } //Look for the DATA block: string line = ""; while (TextTools::toUpper(line) != "BEGIN DATA;") { if (input.eof()) throw Exception("NexusIOSequence::appendFromStream(). No data block was found."); line = TextTools::removeSurroundingWhiteSpaces(FileTools::getNextLine(input)); } //Look for the DIMENSIONS command: string cmdName = "", cmdArgs = ""; while (cmdName != "DIMENSIONS") { if (input.eof()) throw Exception("NexusIOSequence::appendFromStream(). No DIMENSIONS command was found."); NexusTools::getNextCommand(input, cmdName, cmdArgs); cmdName = TextTools::toUpper(cmdName); } map args; KeyvalTools::multipleKeyvals(cmdArgs, args, " "); map argsUp; for (map::iterator it = args.begin(); it != args.end(); it++) argsUp[TextTools::toUpper(it->first)] = it->second; if (argsUp["NTAX"] == "") throw Exception("NexusIOSequence::appendFromStream(). DIMENSIONS command does not have a NTAX argument."); unsigned int ntax = TextTools::to(argsUp["NTAX"]); //Look for the FORMAT command: while (cmdName != "FORMAT") { if (input.eof()) throw Exception("NexusIOSequence::appendFromStream(). No FORMAT command was found."); NexusTools::getNextCommand(input, cmdName, cmdArgs); cmdName = TextTools::toUpper(cmdName); } if (TextTools::hasSubstring(cmdArgs, "TRANSPOSE")) throw Exception("NexusIOSequence::appendFromStream(). TRANSPOSE option is not supported."); //Check if the alignment is dotted or not: bool matchChar = TextTools::hasSubstring(TextTools::toUpper(cmdArgs), "MATCHCHAR"); SiteContainer* alignment = 0; if (matchChar) alignment = new AlignedSequenceContainer(&AlphabetTools::DEFAULT_ALPHABET); else alignment = &vsc; //Look for the MATRIX command: line = ""; while (!TextTools::startsWith(TextTools::toUpper(line), "MATRIX")) { if (input.eof()) throw Exception("NexusIOSequence::appendFromStream(). No MATRIX command was found."); line = TextTools::removeSurroundingWhiteSpaces(FileTools::getNextLine(input)); } line = FileTools::getNextLine(input); vector names, seqs; // Read first block: bool commandFinished = false; for (unsigned int i = 0; i < ntax && !input.eof(); i++) { if (TextTools::endsWith(line, ";")) { if (i < ntax - 1) throw IOException("NexusIOSequence::appendFromStream. Early end of MATRIX command, some sequences are missing."); else { commandFinished = true; line = line.substr(0, line.size() - 1); //Remove trailing semi-colon. } } vector v = splitNameAndSequence_(line); names.push_back(v[0]); seqs.push_back(v[1]); line = FileTools::getNextLine(input); } //Then read all other blocks: commandFinished = TextTools::removeSurroundingWhiteSpaces(line) == ";"; //In case the end of command is on a separate line. while (!commandFinished) { for (unsigned int i = 0; i < ntax && !input.eof(); i++) { if (TextTools::endsWith(line, ";")) { if (i < ntax - 1) throw IOException("NexusIOSequence::appendFromStream. Early end of MATRIX command, some sequences are missing."); else { commandFinished = true; line = line.substr(0, line.size() - 1); //Remove trailing semi-colon. } } vector v = splitNameAndSequence_(line); if (v[0] != names[i]) throw IOException("NexusIOSequence::appendFromStream. Bad file, the sequences are not in the same order in interleaved blocks, or one taxon is missing."); seqs[i] += v[1]; line = FileTools::getNextLine(input); commandFinished = TextTools::removeSurroundingWhiteSpaces(line) == ";"; //In case the end of command is on a separate line. } } for (unsigned int i = 0; i < names.size(); i++) { alignment->addSequence(BasicSequence(names[i], seqs[i], vsc.getAlphabet()), checkNames_); } if (matchChar) { //Now we resolve the alignment: SiteContainer* resolvedAlignment = SiteContainerTools::resolveDottedAlignment(*alignment, vsc.getAlphabet()); delete alignment; for (unsigned int i = 0; i < resolvedAlignment->getNumberOfSequences(); i++) { vsc.addSequence(resolvedAlignment->getSequence(i), false); } delete resolvedAlignment; } } /******************************************************************************/ const std::string NexusIOSequence::getFormatName() const { return "Nexus"; } /******************************************************************************/ const std::string NexusIOSequence::getFormatDescription() const { return "Nexus file format."; } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOAlignmentWriterFormat.h000644 000000 000000 00000006412 12147656566 022711 0ustar00rootroot000000 000000 // // File: BppOAlignmentWriterFormat.h // Created by: Julien Dutheil // Created on: Saturday September 15th, 22:10 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _BPPOALIGNMENTWRITERFORMAT_H_ #define _BPPOALIGNMENTWRITERFORMAT_H_ #include "IoSequenceFactory.h" namespace bpp { /** * @brief Sequence I/O in BppO format. * * Creates a new OAlignment object according to * distribution description syntax (see the Bio++ Program Suite * manual for a detailed description of this syntax). * */ class BppOAlignmentWriterFormat: public virtual IOFormat { private: bool verbose_; std::map unparsedArguments_; public: BppOAlignmentWriterFormat(bool verbose = true): verbose_(verbose), unparsedArguments_() {} virtual ~BppOAlignmentWriterFormat() {} public: const std::string getFormatName() const { return "BppO"; } const std::string getFormatDescription() const { return "Bpp Options format."; } const std::string getDataType() const { return "Alignment writer"; } /** * @brief Read a OAlignment object from a string. * * @param description A string describing the reader in the keyval syntax. * @return A new OAlignment object according to options specified. * @throw Exception if an error occured. */ OAlignment* read(const std::string& description) throw (Exception); /** * @return The arguments and their unparsed values from the last call of the read function, if there are any. */ virtual const std::map& getUnparsedArguments() const { return unparsedArguments_; } }; } //end of namespace bpp. #endif //_BPPOALIGNMENTWRITERFORMAT_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/BppOSequenceReaderFormat.h000644 000000 000000 00000006373 12147656566 022477 0ustar00rootroot000000 000000 // // File: BppOSequenceReaderFormat.h // Created by: Julien Dutheil // Created on: Friday September 14th, 14:08 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _BPPOSEQUENCEREADERFORMAT_H_ #define _BPPOSEQUENCEREADERFORMAT_H_ #include "IoSequenceFactory.h" namespace bpp { /** * @brief Sequence I/O in BppO format. * * Creates a new ISequence object according to * distribution description syntax (see the Bio++ Program Suite * manual for a detailed description of this syntax). * */ class BppOSequenceReaderFormat: public virtual IOFormat { private: bool verbose_; std::map unparsedArguments_; public: BppOSequenceReaderFormat(bool verbose = true): verbose_(verbose), unparsedArguments_() {} virtual ~BppOSequenceReaderFormat() {} public: const std::string getFormatName() const { return "BppO"; } const std::string getFormatDescription() const { return "Bpp Options format."; } const std::string getDataType() const { return "Sequence reader"; } /** * @brief Read a ISequence object from a string. * * @param description A string describing the reader in the keyval syntax. * @return A new ISequence object according to options specified. * @throw Exception if an error occured. */ ISequence* read(const std::string& description) throw (Exception); /** * @return The arguments and their unparsed values from the last call of the read function, if there are any. */ virtual const std::map& getUnparsedArguments() const { return unparsedArguments_; } }; } //end of namespace bpp. #endif //_BPPOSEQUENCEREADERFORMAT_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/AbstractOAlignment.h000644 000000 000000 00000005153 12147656566 021366 0ustar00rootroot000000 000000 // // File: AbstractOAlignment.h // Created by: Julien Dutheil // Created on: ? // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ABSTRACTOALIGNMENT_H_ #define _ABSTRACTOALIGNMENT_H_ #include "OSequence.h" #include "../Alphabet/Alphabet.h" #include "../Container/VectorSequenceContainer.h" // From the STL: #include #include namespace bpp { /** * @brief Partial implementation of the OAlignment interface. */ class AbstractOAlignment: public virtual OAlignment { public: AbstractOAlignment() {} virtual ~AbstractOAlignment() {} public: /** * @name OAlignment methods: * * @{ */ void writeAlignment(std::ostream& output, const SiteContainer& sc) const throw (Exception) = 0; void writeAlignment(const std::string& path, const SiteContainer& sc, bool overwrite = true) const throw (Exception) { // Open file in specified mode std::ofstream output(path.c_str(), overwrite ? (std::ios::out) : (std::ios::out | std::ios::app)); writeAlignment(output, sc); output.close(); } /** @} */ }; } //end of namespace bpp. #endif //_ABSTRACTOALIGNMENT_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/IoSequenceFactory.h000644 000000 000000 00000010414 12147656566 021231 0ustar00rootroot000000 000000 // // File IoSequenceFactory.h // Created by: Julien Dutheil // Created on: Tue 18/04/06 10:24 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _IOSEQUENCEFACTORY_H_ #define _IOSEQUENCEFACTORY_H_ #include "ISequence.h" #include "OSequence.h" #include "../Alphabet/Alphabet.h" namespace bpp { /** * @brief Utilitary class for creating sequence readers and writers. */ class IoSequenceFactory { public: static const std::string FASTA_FORMAT; static const std::string MASE_FORMAT; static const std::string CLUSTAL_FORMAT; static const std::string DCSE_FORMAT; static const std::string PHYLIP_FORMAT_INTERLEAVED; static const std::string PHYLIP_FORMAT_SEQUENTIAL; static const std::string PAML_FORMAT_INTERLEAVED; static const std::string PAML_FORMAT_SEQUENTIAL; static const std::string GENBANK_FORMAT; static const std::string NEXUS_FORMAT; public: /** * @brief Creates a new factory object. * * Example: * @code * Alphabet* alphabet = new DNA(); * ISequence* seqReader = IoSequenceFactory().createReader(IoSequenceFactory::FASTA_FORMAT); * SequenceContainer* sequences = seqReader->read("file.fasta", alphabet); * delete seqReader; * @endcode */ IoSequenceFactory() {} virtual ~IoSequenceFactory() {} /** * @brief Get a new dynamically created ISequence object. * * @param format The input file format. * @return A pointer toward a new ISequence object. * @throw Exception If the format name do not match any available format. */ virtual ISequence* createReader(const std::string& format) throw (Exception); /** * @brief Get a new dynamically created IAlignment object. * * @param format The input file format. * @return A pointer toward a new IAlignment object. * @throw Exception If the format name do not match any available format. */ virtual IAlignment* createAlignmentReader(const std::string& format) throw (Exception); /** * @brief Get a new dynamically created OSequence object. * * @param format The output file format. * @return A pointer toward a new OSequence object. * @throw Exception If the format name do not match any available format. */ virtual OSequence* createWriter(const std::string& format) throw (Exception); /** * @brief Get a new dynamically created OAlignment object. * * @param format The output file format. * @return A pointer toward a new OAlignment object. * @throw Exception If the format name do not match any available format. */ virtual OAlignment* createAlignmentWriter(const std::string& format) throw (Exception); }; } //end of namespace bpp. #endif //_IOSEQUENCEFACTORY_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/StreamSequenceIterator.h000644 000000 000000 00000010116 12147656566 022276 0ustar00rootroot000000 000000 // // File: StreamSequenceIterator.h // Created by: Julien Dutheil // Created on: Tue Feb 26 14:27 2013 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _STREAMSEQUENCEITERATOR_H_ #define _STREAMSEQUENCEITERATOR_H_ #include "../SequenceIterator.h" #include "ISequenceStream.h" // From the STL: #include namespace bpp { /** * @brief A sequence iterator based on a sequence stream. * * This iterator uses a bpp::BasicSequence object for storing sequences. */ class StreamSequenceIterator: public virtual SequenceIterator { private: const Alphabet* alphabet_; const ISequenceStream* seqStream_; std::istream* stream_; BasicSequence* nextSeq_; public: StreamSequenceIterator(ISequenceStream& seqStream, std::istream& stream, const Alphabet* alphabet); virtual ~StreamSequenceIterator() {} private: //Recopy is forbidden StreamSequenceIterator(const StreamSequenceIterator& ssi): alphabet_(ssi.alphabet_), seqStream_(ssi.seqStream_), stream_(ssi.stream_), nextSeq_(0) {} StreamSequenceIterator& operator=(const StreamSequenceIterator& ssi) { alphabet_ = ssi.alphabet_; seqStream_ = ssi.seqStream_; stream_ = ssi.stream_; nextSeq_ = 0; return *this; } public: virtual Sequence* nextSequence(); virtual bool hasMoreSequences() const { return nextSeq_ != 0; } }; /** * @brief A sequence iterator based on a sequence stream. * * This iterator uses a bpp::SequenceWithQuality object for storing sequences. */ class StreamSequenceWithQualityIterator: public virtual SequenceWithQualityIterator { private: const Alphabet* alphabet_; const ISequenceStream* seqStream_; std::istream* stream_; SequenceWithQuality* nextSeq_; public: StreamSequenceWithQualityIterator(ISequenceStream& seqStream, std::istream& stream, const Alphabet* alphabet); virtual ~StreamSequenceWithQualityIterator() {} private: //Recopy is forbidden StreamSequenceWithQualityIterator(const StreamSequenceWithQualityIterator& ssi): alphabet_(ssi.alphabet_), seqStream_(ssi.seqStream_), stream_(ssi.stream_), nextSeq_(0) {} StreamSequenceWithQualityIterator& operator=(const StreamSequenceWithQualityIterator& ssi) { alphabet_ = ssi.alphabet_; seqStream_ = ssi.seqStream_; stream_ = ssi.stream_; nextSeq_ = 0; return *this; } public: virtual SequenceWithQuality* nextSequence(); virtual bool hasMoreSequences() const { return nextSeq_ != 0; } }; } //end of namespace bpp. #endif //_SEQUENCEITERATOR_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/NexusIoSequence.h000644 000000 000000 00000011273 12147656566 020730 0ustar00rootroot000000 000000 // // File: NexusIOSequence.h // Created by: Julien Dutheil // Created on: Wed May 27 16:15 2009 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _NEXUSIOSEQUENCE_H_ #define _NEXUSIOSEQUENCE_H_ #include "AbstractIAlignment.h" #include "../Sequence.h" #include "../Container/SequenceContainer.h" #include "../Container/VectorSequenceContainer.h" #include "../Container/AlignedSequenceContainer.h" // From the STL: #include namespace bpp { /** * @brief The Nexus format reader for sequences. * * An AlignedSequenceContainer is used instead of a VectorSequenceContainer. * * This reader is not supposed to be a full parser of the Nexus files, * but only extract the sequence data. Only a basic subset of the options * are and will be supported. * * This format is described in the following paper: * Maddison D, Swofford D, and Maddison W (1997), _Syst Biol_ 46(4):590-621 * * @author Julien Dutheil */ class NexusIOSequence: public AbstractIAlignment, public virtual ISequence { protected: /** * @brief The maximum number of chars to be written on a line. */ unsigned int charsByLine_; bool checkNames_; public: /** * @brief Build a new Phylip file reader. * * @param charsByLine The number of base to display in a row (ignored for now, no writing support). * @param checkSequenceNames Tell if the names in the file should be checked for unicity (slower, in o(n*n) where n is the number of sequences). */ NexusIOSequence(unsigned int charsByLine = 100, bool checkSequenceNames = true): charsByLine_(charsByLine), checkNames_(checkSequenceNames) {} virtual ~NexusIOSequence() {} public: /** * @name The AbstractIAlignment interface. * * @{ */ void appendAlignmentFromStream(std::istream& input, SiteContainer& sc) const throw (Exception); /** @} */ /** * @name The ISequence interface. * * As a SiteContainer is a subclass of SequenceContainer, we hereby implement the ISequence * interface by downcasting the interface. * * @{ */ virtual SequenceContainer* readSequences(std::istream& input, const Alphabet* alpha) const throw (Exception) { return readAlignment(input, alpha); } virtual SequenceContainer* readSequences(const std::string& path, const Alphabet* alpha) const throw (Exception) { return readAlignment(path, alpha); } /** @} */ /** * @name The IOSequence interface. * * @{ */ const std::string getFormatName() const; const std::string getFormatDescription() const; /** @} */ /** * @return true if the names are to be checked when reading sequences from files. */ bool checkNames() const { return checkNames_; } /** * @brief Tell whether the sequence names should be checked when reading from files. * * @param yn whether the sequence names should be checked when reading from files. */ void checkNames(bool yn) { checkNames_ = yn; } private: //Reading tools: const std::vector splitNameAndSequence_(const std::string & s) const throw (Exception); }; } //end of namespace bpp. #endif //_NEXUSIOSEQUENCE_H_ bpp-seq-2.1.0/src/Bpp/Seq/Io/StreamSequenceIterator.cpp000644 000000 000000 00000006253 12147656566 022640 0ustar00rootroot000000 000000 // // File: StreamSequenceIterator.cpp // Created by: Julien Dutheil // Created on: Tue Feb 26 14:27 2013 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "StreamSequenceIterator.h" using namespace bpp; StreamSequenceIterator::StreamSequenceIterator(ISequenceStream& seqStream, std::istream& stream, const Alphabet* alphabet): alphabet_(alphabet), seqStream_(&seqStream), stream_(&stream), nextSeq_(new BasicSequence(alphabet_)) { bool test = seqStream_->nextSequence(*stream_, *nextSeq_); if (!test) { delete nextSeq_; nextSeq_ = 0; //No more sequence available } } Sequence* StreamSequenceIterator::nextSequence() { BasicSequence* seq = nextSeq_; if (nextSeq_) { nextSeq_ = new BasicSequence(alphabet_); bool test = seqStream_->nextSequence(*stream_, *nextSeq_); if (!test) { delete nextSeq_; nextSeq_ = 0; //No more sequence available } } return seq; } StreamSequenceWithQualityIterator::StreamSequenceWithQualityIterator(ISequenceStream& seqStream, std::istream& stream, const Alphabet* alphabet): alphabet_(alphabet), seqStream_(&seqStream), stream_(&stream), nextSeq_(new SequenceWithQuality(alphabet_)) { bool test = seqStream_->nextSequence(*stream_, *nextSeq_); if (!test) { delete nextSeq_; nextSeq_ = 0; //No more sequence available } } SequenceWithQuality* StreamSequenceWithQualityIterator::nextSequence() { SequenceWithQuality* seq = nextSeq_; if (nextSeq_) { nextSeq_ = new SequenceWithQuality(alphabet_); bool test = seqStream_->nextSequence(*stream_, *nextSeq_); if (!test) { delete nextSeq_; nextSeq_ = 0; //No more sequence available } } return seq; } bpp-seq-2.1.0/src/Bpp/Seq/SequenceTools.cpp000644 000000 000000 00000052142 12147656566 020422 0ustar00rootroot000000 000000 // // File: SequenceTools.cpp // Authors: Guillaume Deuchst // Julien Dutheil // Sylvain Gaillard // Created on: Tue Aug 21 2003 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SequenceTools.h" #include "Alphabet/AlphabetTools.h" #include "StringSequenceTools.h" #include #include using namespace bpp; // From the STL: #include #include #include #include using namespace std; DNA SequenceTools::_DNA; RNA SequenceTools::_RNA; RNY SequenceTools::_RNY(_DNA); NucleicAcidsReplication SequenceTools::_DNARep(&_DNA, &_DNA); NucleicAcidsReplication SequenceTools::_RNARep(&_RNA, &_RNA); NucleicAcidsReplication SequenceTools::_transc(&_DNA, &_RNA); /******************************************************************************/ Sequence* SequenceTools::subseq(const Sequence& sequence, size_t begin, size_t end) throw (IndexOutOfBoundsException, Exception) { // Checking interval if (end >= sequence.size()) throw IndexOutOfBoundsException ("SequenceTools::subseq : Invalid upper bound", end, 0, sequence.size()); if (end < begin) throw Exception ("SequenceTools::subseq : Invalid interval"); // Copy sequence vector temp(sequence.getContent()); // Truncate sequence temp.erase(temp.begin() + end + 1, temp.end()); temp.erase(temp.begin(), temp.begin() + begin); // New sequence creation return new BasicSequence(sequence.getName(), temp, sequence.getComments(), sequence.getAlphabet()); } /******************************************************************************/ Sequence* SequenceTools::concatenate(const Sequence& seq1, const Sequence& seq2) throw (AlphabetMismatchException, Exception) { // Sequence's alphabets matching verification if ((seq1.getAlphabet()->getAlphabetType()) != (seq2.getAlphabet()->getAlphabetType())) throw AlphabetMismatchException("SequenceTools::concatenate : Sequence's alphabets don't match ", seq1.getAlphabet(), seq2.getAlphabet()); // Sequence's names matching verification if (seq1.getName() != seq2.getName()) throw Exception ("SequenceTools::concatenate : Sequence's names don't match"); // Concatenate sequences and send result vector sequence = seq1.getContent(); vector sequence2 = seq2.getContent(); sequence.insert(sequence.end(), sequence2.begin(), sequence2.end()); return new BasicSequence(seq1.getName(), sequence, seq1.getComments(), seq1.getAlphabet()); } /******************************************************************************/ Sequence& SequenceTools::complement(Sequence& seq) throw (AlphabetException) { // Alphabet type checking NucleicAcidsReplication* NAR; if (seq.getAlphabet()->getAlphabetType() == "DNA alphabet") { NAR = &_DNARep; } else if (seq.getAlphabet()->getAlphabetType() == "RNA alphabet") { NAR = &_RNARep; } else { throw AlphabetException("SequenceTools::complement: Sequence must be nucleic.", seq.getAlphabet()); } for (size_t i = 0; i < seq.size(); i++) { seq.setElement(i, NAR->translate(seq.getValue(i))); } return seq; } /******************************************************************************/ Sequence* SequenceTools::getComplement(const Sequence& sequence) throw (AlphabetException) { // Alphabet type checking NucleicAcidsReplication* NAR; if (sequence.getAlphabet()->getAlphabetType() == "DNA alphabet") { NAR = &_DNARep; } else if (sequence.getAlphabet()->getAlphabetType() == "RNA alphabet") { NAR = &_RNARep; } else { throw AlphabetException ("SequenceTools::getComplement: Sequence must be nucleic.", sequence.getAlphabet()); } return NAR->translate(sequence); } /******************************************************************************/ Sequence* SequenceTools::transcript(const Sequence& sequence) throw (AlphabetException) { // Alphabet type checking if (sequence.getAlphabet()->getAlphabetType() != "DNA alphabet") { throw AlphabetException ("SequenceTools::transcript : Sequence must be DNA", sequence.getAlphabet()); } return _transc.translate(sequence); } /******************************************************************************/ Sequence* SequenceTools::reverseTranscript(const Sequence& sequence) throw (AlphabetException) { // Alphabet type checking if (sequence.getAlphabet()->getAlphabetType() != "RNA alphabet") { throw AlphabetException ("SequenceTools::reverseTranscript : Sequence must be RNA", sequence.getAlphabet()); } return _transc.reverse(sequence); } /******************************************************************************/ Sequence& SequenceTools::invert(Sequence& seq) { size_t seq_size = seq.size(); // store seq size for efficiency unsigned int tmp_state = 0; // to store one state when swapping positions size_t j = seq_size; // symetric position iterator from sequence end for (size_t i = 0; i < seq_size / 2; i++) { j = seq_size - 1 - i; tmp_state = seq.getValue(i); seq.setElement(i, seq.getValue(j)); seq.setElement(j, tmp_state); } return seq; } /******************************************************************************/ Sequence* SequenceTools::getInvert(const Sequence& sequence) { Sequence* iSeq = sequence.clone(); invert(*iSeq); return iSeq; } /******************************************************************************/ Sequence& SequenceTools::invertComplement(Sequence& seq) { // Alphabet type checking NucleicAcidsReplication* NAR; if (seq.getAlphabet()->getAlphabetType() == "DNA alphabet") { NAR = &_DNARep; } else if (seq.getAlphabet()->getAlphabetType() == "RNA alphabet") { NAR = &_RNARep; } else { throw AlphabetException("SequenceTools::complement: Sequence must be nucleic.", seq.getAlphabet()); } // for (size_t i = 0 ; i < seq.size() ; i++) { // seq.setElement(i, NAR->translate(seq.getValue(i))); // } size_t seq_size = seq.size(); // store seq size for efficiency int tmp_state = 0; // to store one state when swapping positions size_t j = seq_size; // symetric position iterator from sequence end for (size_t i = 0; i < seq_size / 2; i++) { j = seq_size - 1 - i; tmp_state = seq.getValue(i); seq.setElement(i, NAR->translate(seq.getValue(j))); seq.setElement(j, NAR->translate(tmp_state)); } if (seq_size % 2) // treate the state in the middle of odd sequences { seq.setElement(seq_size / 2, NAR->translate(seq.getValue(seq_size / 2))); } return seq; } /******************************************************************************/ double SequenceTools::getPercentIdentity(const Sequence& seq1, const Sequence& seq2, bool ignoreGaps) throw (AlphabetMismatchException, SequenceNotAlignedException) { if (seq1.getAlphabet()->getAlphabetType() != seq2.getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("SequenceTools::getPercentIdentity", seq1.getAlphabet(), seq2.getAlphabet()); if (seq1.size() != seq2.size()) throw SequenceNotAlignedException("SequenceTools::getPercentIdentity", &seq2); int gap = seq1.getAlphabet()->getGapCharacterCode(); size_t id = 0; size_t tot = 0; for (size_t i = 0; i < seq1.size(); i++) { int x = seq1.getValue(i); int y = seq2.getValue(i); if (ignoreGaps) { if (x != gap && y != gap) { tot++; if (x == y) id++; } } else { tot++; if (x == y) id++; } } return static_cast(id) / static_cast(tot) * 100.; } /******************************************************************************/ size_t SequenceTools::getNumberOfSites(const Sequence& seq) { size_t count = 0; const Alphabet* alpha = seq.getAlphabet(); for (size_t i = 0; i < seq.size(); i++) { if (!alpha->isGap(seq[i])) count++; } return count; } /******************************************************************************/ size_t SequenceTools::getNumberOfCompleteSites(const Sequence& seq) { size_t count = 0; const Alphabet* alpha = seq.getAlphabet(); for (size_t i = 0; i < seq.size(); i++) { if (!alpha->isGap(seq[i]) && !alpha->isUnresolved(seq[i])) count++; } return count; } /******************************************************************************/ size_t SequenceTools::getNumberOfUnresolvedSites(const Sequence& seq) { size_t count = 0; const Alphabet* alpha = seq.getAlphabet(); for (size_t i = 0; i < seq.size(); i++) { if (alpha->isUnresolved(seq[i])) count++; } return count; } /******************************************************************************/ Sequence* SequenceTools::getSequenceWithoutGaps(const Sequence& seq) { const Alphabet* alpha = seq.getAlphabet(); vector content; for (size_t i = 0; i < seq.size(); i++) { if (!alpha->isGap(seq[i])) content.push_back(seq[i]); } Sequence* newSeq = dynamic_cast(seq.clone()); newSeq->setContent(content); return newSeq; } /******************************************************************************/ void SequenceTools::removeGaps(Sequence& seq) { const Alphabet* alpha = seq.getAlphabet(); for (size_t i = seq.size(); i > 0; --i) { if (alpha->isGap(seq[i - 1])) seq.deleteElement(i - 1); } } /******************************************************************************/ Sequence* SequenceTools::getSequenceWithoutStops(const Sequence& seq) throw (Exception) { const CodonAlphabet* calpha = dynamic_cast(seq.getAlphabet()); if (!calpha) throw Exception("SequenceTools::getSequenceWithoutStops. Input sequence should have a codon alphabet."); vector content; for (size_t i = 0; i < seq.size(); i++) { if (!calpha->isStop(seq[i])) content.push_back(seq[i]); } Sequence* newSeq = dynamic_cast(seq.clone()); newSeq->setContent(content); return newSeq; } /******************************************************************************/ void SequenceTools::removeStops(Sequence& seq) throw (Exception) { const CodonAlphabet* calpha = dynamic_cast(seq.getAlphabet()); if (!calpha) throw Exception("SequenceTools::removeStops. Input sequence should have a codon alphabet."); for (size_t i = seq.size(); i > 0; --i) { if (calpha->isStop(seq[i - 1])) seq.deleteElement(i - 1); } } /******************************************************************************/ void SequenceTools::replaceStopsWithGaps(Sequence& seq) throw (Exception) { const CodonAlphabet* calpha = dynamic_cast(seq.getAlphabet()); if (!calpha) throw Exception("SequenceTools::replaceStopsWithGaps. Input sequence should have a codon alphabet."); int gap = calpha->getGapCharacterCode(); for (size_t i = 0; i < seq.size(); ++i) { if (calpha->isStop(seq[i])) seq.setElement(i, gap); } } /******************************************************************************/ BowkerTest* SequenceTools::bowkerTest(const Sequence& seq1, const Sequence& seq2) throw (SequenceNotAlignedException) { if (seq1.size() != seq2.size()) throw SequenceNotAlignedException("SequenceTools::bowkerTest.", &seq2); size_t n = seq1.size(); const Alphabet* alpha = seq1.getAlphabet(); unsigned int r = alpha->getSize(); // Compute contingency table: RowMatrix array(r, r); int x, y; for (size_t i = 0; i < n; i++) { x = seq1[i]; y = seq2[i]; if (!alpha->isGap(x) && !alpha->isUnresolved(x) && !alpha->isGap(y) && !alpha->isUnresolved(y)) { array(static_cast(x), static_cast(y))++; } } // Compute Bowker's statistic: double sb2 = 0, nij, nji; for (unsigned int i = 1; i < r; ++i) { for (unsigned int j = 0; j < i; ++j) { nij = array(i, j); nji = array(j, i); if (nij != 0 || nji != 0) { sb2 += pow(nij - nji, 2) / (nij + nji); } // Else: we should display a warning there. } } // Compute p-value: double pvalue = 1. - RandomTools::pChisq(sb2, (r - 1) * r / 2); // Return results: BowkerTest* test = new BowkerTest(); test->setStatistic(sb2); test->setPValue(pvalue); return test; } /******************************************************************************/ void SequenceTools::getPutativeHaplotypes(const Sequence& seq, std::vector& hap, unsigned int level) { vector< vector< int > > states(seq.size()); list t_hap; const Alphabet* alpha = seq.getAlphabet(); unsigned int hap_count = 1; // Vector of available states at each position for (size_t i = 0; i < seq.size(); i++) { vector st = alpha->getAlias(seq[i]); if (!st.size()) { st.push_back(alpha->getGapCharacterCode()); } if (st.size() <= level) { states[i] = st; } else { states[i] = vector(1, seq[i]); } } // Combinatorial haplotypes building (the use of tree may be more accurate) t_hap.push_back(new BasicSequence(seq.getName() + "_hap" + TextTools::toString(hap_count++), "", alpha)); for (size_t i = 0; i < states.size(); i++) { for (list::iterator it = t_hap.begin(); it != t_hap.end(); it++) { for (unsigned int j = 0; j < states[i].size(); j++) { Sequence* tmp_seq = new BasicSequence(seq.getName() + "_hap", (**it).getContent(), alpha); if (j < states[i].size() - 1) { tmp_seq->setName(tmp_seq->getName() + TextTools::toString(hap_count++)); tmp_seq->addElement(states[i][j]); t_hap.insert(it, tmp_seq); } else { (**it).addElement(states[i][j]); } } } } for (list::reverse_iterator it = t_hap.rbegin(); it != t_hap.rend(); it++) { hap.push_back(*it); } } /******************************************************************************/ Sequence* SequenceTools::combineSequences(const Sequence& s1, const Sequence& s2) throw (AlphabetMismatchException) { if (s1.getAlphabet()->getAlphabetType() != s2.getAlphabet()->getAlphabetType()) { throw AlphabetMismatchException("SequenceTools::combineSequences(const Sequence& s1, const Sequence& s2): s1 and s2 don't have same Alphabet.", s1.getAlphabet(), s2.getAlphabet()); } const Alphabet* alpha = s1.getAlphabet(); vector st; vector seq; size_t length = NumTools::max(s1.size(), s2.size()); for (size_t i = 0; i < length; i++) { if (i < s1.size()) st.push_back(s1.getValue(i)); if (i < s2.size()) st.push_back(s2.getValue(i)); seq.push_back(alpha->getGeneric(st)); st.clear(); } Sequence* s = new BasicSequence(s1.getName() + "+" + s2.getName(), seq, alpha); return s; } /******************************************************************************/ Sequence* SequenceTools::subtractHaplotype(const Sequence& s, const Sequence& h, string name, unsigned int level) throw (SequenceNotAlignedException) { const Alphabet* alpha = s.getAlphabet(); if (name.size() == 0) name = s.getName() + "_haplotype"; string seq; if (s.size() != h.size()) throw SequenceNotAlignedException("SequenceTools::subtractHaplotype: haplotype must be aligned with the sequence.", &h); for (unsigned int i = 0; i < s.size(); ++i) { string c; vector nucs = alpha->getAlias(s.getValue(i)); if (nucs.size() > 1) { remove(nucs.begin(), nucs.end(), h.getValue(i)); nucs = vector(nucs.begin(), nucs.end() - 1); } else { nucs = vector(nucs.begin(), nucs.end()); } c = alpha->intToChar(alpha->getGeneric(nucs)); if (level <= nucs.size() && (alpha->isUnresolved(s.getValue(i)) || alpha->isUnresolved(h.getValue(i)))) { c = alpha->intToChar(alpha->getUnknownCharacterCode()); } seq += c; } Sequence* hap = new BasicSequence(name, seq, alpha); return hap; } /******************************************************************************/ Sequence* SequenceTools::RNYslice(const Sequence& seq, int ph) throw (AlphabetException) { // Alphabet type checking if (seq.getAlphabet()->getAlphabetType() != "DNA alphabet") { throw AlphabetException ("SequenceTools::transcript : Sequence must be DNA", seq.getAlphabet()); } if ((ph < 1) || (ph > 3)) throw Exception("Bad phase for RNYSlice: " + TextTools::toString(ph) + ". Should be between 1 and 3."); size_t s = seq.size(); size_t n = (s - ph + 3) / 3; vector content(n); int tir = seq.getAlphabet()->getGapCharacterCode(); size_t j; for (size_t i = 0; i < n; i++) { j = i * 3 + ph - 1; if (j == 0) content[i] = _RNY.getRNY(tir, seq[0], seq[1], *seq.getAlphabet()); else { if (j == s - 1) content[i] = _RNY.getRNY(seq[j - 1], seq[j], tir, *seq.getAlphabet()); else content[i] = _RNY.getRNY(seq[j - 1], seq[j], seq[j + 1], *seq.getAlphabet()); } } // New sequence creating, and sense reversing Sequence* sq = new BasicSequence(seq.getName(), content, seq.getComments(), &_RNY); // Send result return sq; } Sequence* SequenceTools::RNYslice(const Sequence& seq) throw (AlphabetException) { // Alphabet type checking if (seq.getAlphabet()->getAlphabetType() != "DNA alphabet") { throw AlphabetException ("SequenceTools::transcript : Sequence must be DNA", seq.getAlphabet()); } size_t n = seq.size(); vector content(n); int tir = seq.getAlphabet()->getGapCharacterCode(); if (seq.size() >= 2) { content[0] = _RNY.getRNY(tir, seq[0], seq[1], *seq.getAlphabet()); for (unsigned int i = 1; i < n - 1; i++) { content[i] = _RNY.getRNY(seq[i - 1], seq[i], seq[i + 1], *seq.getAlphabet()); } content[n - 1] = _RNY.getRNY(seq[n - 2], seq[n - 1], tir, *seq.getAlphabet()); } // New sequence creating, and sense reversing Sequence* s = new BasicSequence(seq.getName(), content, seq.getComments(), &_RNY); // Send result return s; } /******************************************************************************/ void SequenceTools::getCDS(Sequence& sequence, bool checkInit, bool checkStop, bool includeInit, bool includeStop) { const CodonAlphabet* alphabet = dynamic_cast(sequence.getAlphabet()); if (!alphabet) throw AlphabetException("SequenceTools::getCDS. Sequence is not a codon sequence."); if (checkInit) { unsigned int i; for (i = 0; i < sequence.size() && !alphabet->isInit(sequence[i]); ++i) {} for (unsigned int j = 0; includeInit ? j < i : j <= i; ++j) { sequence.deleteElement(j); } } if (checkStop) { unsigned int i; for (i = 0; i < sequence.size() && !alphabet->isStop(sequence[i]); ++i) {} for (unsigned int j = includeStop ? i + 1 : i; j < sequence.size(); ++j) { sequence.deleteElement(j); } } } /******************************************************************************/ size_t SequenceTools::findFirstOf(const Sequence& seq, const Sequence& motif, bool strict) { if (motif.size() > seq.size()) return seq.size(); for (size_t seqi = 0; seqi < seq.size() - motif.size() + 1; seqi++) { bool match = false; for (size_t moti = 0; moti < motif.size(); moti++) { if (strict) { match = seq.getValue(seqi + moti) == motif.getValue(moti); } else { match = AlphabetTools::match(seq.getAlphabet(), seq.getValue(seqi + moti), motif.getValue(moti)); } if (!match) { break; } } if (match) { return seqi; } } return seq.size(); } /******************************************************************************/ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/__BLOSUM50MatrixCode000644 000000 000000 00000025526 12147656566 023212 0ustar00rootroot000000 000000 distanceMatrix_(0,0) = 5; distanceMatrix_(0,1) = -2; distanceMatrix_(0,2) = -1; distanceMatrix_(0,3) = -2; distanceMatrix_(0,4) = -1; distanceMatrix_(0,5) = -1; distanceMatrix_(0,6) = -1; distanceMatrix_(0,7) = 0; distanceMatrix_(0,8) = -2; distanceMatrix_(0,9) = -1; distanceMatrix_(0,10) = -2; distanceMatrix_(0,11) = -1; distanceMatrix_(0,12) = -1; distanceMatrix_(0,13) = -3; distanceMatrix_(0,14) = -1; distanceMatrix_(0,15) = 1; distanceMatrix_(0,16) = 0; distanceMatrix_(0,17) = -3; distanceMatrix_(0,18) = -2; distanceMatrix_(0,19) = 0; distanceMatrix_(1,0) = -2; distanceMatrix_(1,1) = 7; distanceMatrix_(1,2) = -1; distanceMatrix_(1,3) = -2; distanceMatrix_(1,4) = -4; distanceMatrix_(1,5) = 1; distanceMatrix_(1,6) = 0; distanceMatrix_(1,7) = -3; distanceMatrix_(1,8) = 0; distanceMatrix_(1,9) = -4; distanceMatrix_(1,10) = -3; distanceMatrix_(1,11) = 3; distanceMatrix_(1,12) = -2; distanceMatrix_(1,13) = -3; distanceMatrix_(1,14) = -3; distanceMatrix_(1,15) = -1; distanceMatrix_(1,16) = -1; distanceMatrix_(1,17) = -3; distanceMatrix_(1,18) = -1; distanceMatrix_(1,19) = -3; distanceMatrix_(2,0) = -1; distanceMatrix_(2,1) = -1; distanceMatrix_(2,2) = 7; distanceMatrix_(2,3) = 2; distanceMatrix_(2,4) = -2; distanceMatrix_(2,5) = 0; distanceMatrix_(2,6) = 0; distanceMatrix_(2,7) = 0; distanceMatrix_(2,8) = 1; distanceMatrix_(2,9) = -3; distanceMatrix_(2,10) = -4; distanceMatrix_(2,11) = 0; distanceMatrix_(2,12) = -2; distanceMatrix_(2,13) = -4; distanceMatrix_(2,14) = -2; distanceMatrix_(2,15) = 1; distanceMatrix_(2,16) = 0; distanceMatrix_(2,17) = -4; distanceMatrix_(2,18) = -2; distanceMatrix_(2,19) = -3; distanceMatrix_(3,0) = -2; distanceMatrix_(3,1) = -2; distanceMatrix_(3,2) = 2; distanceMatrix_(3,3) = 8; distanceMatrix_(3,4) = -4; distanceMatrix_(3,5) = 0; distanceMatrix_(3,6) = 2; distanceMatrix_(3,7) = -1; distanceMatrix_(3,8) = -1; distanceMatrix_(3,9) = -4; distanceMatrix_(3,10) = -4; distanceMatrix_(3,11) = -1; distanceMatrix_(3,12) = -4; distanceMatrix_(3,13) = -5; distanceMatrix_(3,14) = -1; distanceMatrix_(3,15) = 0; distanceMatrix_(3,16) = -1; distanceMatrix_(3,17) = -5; distanceMatrix_(3,18) = -3; distanceMatrix_(3,19) = -4; distanceMatrix_(4,0) = -1; distanceMatrix_(4,1) = -4; distanceMatrix_(4,2) = -2; distanceMatrix_(4,3) = -4; distanceMatrix_(4,4) = 13; distanceMatrix_(4,5) = -3; distanceMatrix_(4,6) = -3; distanceMatrix_(4,7) = -3; distanceMatrix_(4,8) = -3; distanceMatrix_(4,9) = -2; distanceMatrix_(4,10) = -2; distanceMatrix_(4,11) = -3; distanceMatrix_(4,12) = -2; distanceMatrix_(4,13) = -2; distanceMatrix_(4,14) = -4; distanceMatrix_(4,15) = -1; distanceMatrix_(4,16) = -1; distanceMatrix_(4,17) = -5; distanceMatrix_(4,18) = -3; distanceMatrix_(4,19) = -1; distanceMatrix_(5,0) = -1; distanceMatrix_(5,1) = 1; distanceMatrix_(5,2) = 0; distanceMatrix_(5,3) = 0; distanceMatrix_(5,4) = -3; distanceMatrix_(5,5) = 7; distanceMatrix_(5,6) = 2; distanceMatrix_(5,7) = -2; distanceMatrix_(5,8) = 1; distanceMatrix_(5,9) = -3; distanceMatrix_(5,10) = -2; distanceMatrix_(5,11) = 2; distanceMatrix_(5,12) = 0; distanceMatrix_(5,13) = -4; distanceMatrix_(5,14) = -1; distanceMatrix_(5,15) = 0; distanceMatrix_(5,16) = -1; distanceMatrix_(5,17) = -1; distanceMatrix_(5,18) = -1; distanceMatrix_(5,19) = -3; distanceMatrix_(6,0) = -1; distanceMatrix_(6,1) = 0; distanceMatrix_(6,2) = 0; distanceMatrix_(6,3) = 2; distanceMatrix_(6,4) = -3; distanceMatrix_(6,5) = 2; distanceMatrix_(6,6) = 6; distanceMatrix_(6,7) = -3; distanceMatrix_(6,8) = 0; distanceMatrix_(6,9) = -4; distanceMatrix_(6,10) = -3; distanceMatrix_(6,11) = 1; distanceMatrix_(6,12) = -2; distanceMatrix_(6,13) = -3; distanceMatrix_(6,14) = -1; distanceMatrix_(6,15) = -1; distanceMatrix_(6,16) = -1; distanceMatrix_(6,17) = -3; distanceMatrix_(6,18) = -2; distanceMatrix_(6,19) = -3; distanceMatrix_(7,0) = 0; distanceMatrix_(7,1) = -3; distanceMatrix_(7,2) = 0; distanceMatrix_(7,3) = -1; distanceMatrix_(7,4) = -3; distanceMatrix_(7,5) = -2; distanceMatrix_(7,6) = -3; distanceMatrix_(7,7) = 8; distanceMatrix_(7,8) = -2; distanceMatrix_(7,9) = -4; distanceMatrix_(7,10) = -4; distanceMatrix_(7,11) = -2; distanceMatrix_(7,12) = -3; distanceMatrix_(7,13) = -4; distanceMatrix_(7,14) = -2; distanceMatrix_(7,15) = 0; distanceMatrix_(7,16) = -2; distanceMatrix_(7,17) = -3; distanceMatrix_(7,18) = -3; distanceMatrix_(7,19) = -4; distanceMatrix_(8,0) = -2; distanceMatrix_(8,1) = 0; distanceMatrix_(8,2) = 1; distanceMatrix_(8,3) = -1; distanceMatrix_(8,4) = -3; distanceMatrix_(8,5) = 1; distanceMatrix_(8,6) = 0; distanceMatrix_(8,7) = -2; distanceMatrix_(8,8) = 10; distanceMatrix_(8,9) = -4; distanceMatrix_(8,10) = -3; distanceMatrix_(8,11) = 0; distanceMatrix_(8,12) = -1; distanceMatrix_(8,13) = -1; distanceMatrix_(8,14) = -2; distanceMatrix_(8,15) = -1; distanceMatrix_(8,16) = -2; distanceMatrix_(8,17) = -3; distanceMatrix_(8,18) = 2; distanceMatrix_(8,19) = -4; distanceMatrix_(9,0) = -1; distanceMatrix_(9,1) = -4; distanceMatrix_(9,2) = -3; distanceMatrix_(9,3) = -4; distanceMatrix_(9,4) = -2; distanceMatrix_(9,5) = -3; distanceMatrix_(9,6) = -4; distanceMatrix_(9,7) = -4; distanceMatrix_(9,8) = -4; distanceMatrix_(9,9) = 5; distanceMatrix_(9,10) = 2; distanceMatrix_(9,11) = -3; distanceMatrix_(9,12) = 2; distanceMatrix_(9,13) = 0; distanceMatrix_(9,14) = -3; distanceMatrix_(9,15) = -3; distanceMatrix_(9,16) = -1; distanceMatrix_(9,17) = -3; distanceMatrix_(9,18) = -1; distanceMatrix_(9,19) = 4; distanceMatrix_(10,0) = -2; distanceMatrix_(10,1) = -3; distanceMatrix_(10,2) = -4; distanceMatrix_(10,3) = -4; distanceMatrix_(10,4) = -2; distanceMatrix_(10,5) = -2; distanceMatrix_(10,6) = -3; distanceMatrix_(10,7) = -4; distanceMatrix_(10,8) = -3; distanceMatrix_(10,9) = 2; distanceMatrix_(10,10) = 5; distanceMatrix_(10,11) = -3; distanceMatrix_(10,12) = 3; distanceMatrix_(10,13) = 1; distanceMatrix_(10,14) = -4; distanceMatrix_(10,15) = -3; distanceMatrix_(10,16) = -1; distanceMatrix_(10,17) = -2; distanceMatrix_(10,18) = -1; distanceMatrix_(10,19) = 1; distanceMatrix_(11,0) = -1; distanceMatrix_(11,1) = 3; distanceMatrix_(11,2) = 0; distanceMatrix_(11,3) = -1; distanceMatrix_(11,4) = -3; distanceMatrix_(11,5) = 2; distanceMatrix_(11,6) = 1; distanceMatrix_(11,7) = -2; distanceMatrix_(11,8) = 0; distanceMatrix_(11,9) = -3; distanceMatrix_(11,10) = -3; distanceMatrix_(11,11) = 6; distanceMatrix_(11,12) = -2; distanceMatrix_(11,13) = -4; distanceMatrix_(11,14) = -1; distanceMatrix_(11,15) = 0; distanceMatrix_(11,16) = -1; distanceMatrix_(11,17) = -3; distanceMatrix_(11,18) = -2; distanceMatrix_(11,19) = -3; distanceMatrix_(12,0) = -1; distanceMatrix_(12,1) = -2; distanceMatrix_(12,2) = -2; distanceMatrix_(12,3) = -4; distanceMatrix_(12,4) = -2; distanceMatrix_(12,5) = 0; distanceMatrix_(12,6) = -2; distanceMatrix_(12,7) = -3; distanceMatrix_(12,8) = -1; distanceMatrix_(12,9) = 2; distanceMatrix_(12,10) = 3; distanceMatrix_(12,11) = -2; distanceMatrix_(12,12) = 7; distanceMatrix_(12,13) = 0; distanceMatrix_(12,14) = -3; distanceMatrix_(12,15) = -2; distanceMatrix_(12,16) = -1; distanceMatrix_(12,17) = -1; distanceMatrix_(12,18) = 0; distanceMatrix_(12,19) = 1; distanceMatrix_(13,0) = -3; distanceMatrix_(13,1) = -3; distanceMatrix_(13,2) = -4; distanceMatrix_(13,3) = -5; distanceMatrix_(13,4) = -2; distanceMatrix_(13,5) = -4; distanceMatrix_(13,6) = -3; distanceMatrix_(13,7) = -4; distanceMatrix_(13,8) = -1; distanceMatrix_(13,9) = 0; distanceMatrix_(13,10) = 1; distanceMatrix_(13,11) = -4; distanceMatrix_(13,12) = 0; distanceMatrix_(13,13) = 8; distanceMatrix_(13,14) = -4; distanceMatrix_(13,15) = -3; distanceMatrix_(13,16) = -2; distanceMatrix_(13,17) = 1; distanceMatrix_(13,18) = 4; distanceMatrix_(13,19) = -1; distanceMatrix_(14,0) = -1; distanceMatrix_(14,1) = -3; distanceMatrix_(14,2) = -2; distanceMatrix_(14,3) = -1; distanceMatrix_(14,4) = -4; distanceMatrix_(14,5) = -1; distanceMatrix_(14,6) = -1; distanceMatrix_(14,7) = -2; distanceMatrix_(14,8) = -2; distanceMatrix_(14,9) = -3; distanceMatrix_(14,10) = -4; distanceMatrix_(14,11) = -1; distanceMatrix_(14,12) = -3; distanceMatrix_(14,13) = -4; distanceMatrix_(14,14) = 10; distanceMatrix_(14,15) = -1; distanceMatrix_(14,16) = -1; distanceMatrix_(14,17) = -4; distanceMatrix_(14,18) = -3; distanceMatrix_(14,19) = -3; distanceMatrix_(15,0) = 1; distanceMatrix_(15,1) = -1; distanceMatrix_(15,2) = 1; distanceMatrix_(15,3) = 0; distanceMatrix_(15,4) = -1; distanceMatrix_(15,5) = 0; distanceMatrix_(15,6) = -1; distanceMatrix_(15,7) = 0; distanceMatrix_(15,8) = -1; distanceMatrix_(15,9) = -3; distanceMatrix_(15,10) = -3; distanceMatrix_(15,11) = 0; distanceMatrix_(15,12) = -2; distanceMatrix_(15,13) = -3; distanceMatrix_(15,14) = -1; distanceMatrix_(15,15) = 5; distanceMatrix_(15,16) = 2; distanceMatrix_(15,17) = -4; distanceMatrix_(15,18) = -2; distanceMatrix_(15,19) = -2; distanceMatrix_(16,0) = 0; distanceMatrix_(16,1) = -1; distanceMatrix_(16,2) = 0; distanceMatrix_(16,3) = -1; distanceMatrix_(16,4) = -1; distanceMatrix_(16,5) = -1; distanceMatrix_(16,6) = -1; distanceMatrix_(16,7) = -2; distanceMatrix_(16,8) = -2; distanceMatrix_(16,9) = -1; distanceMatrix_(16,10) = -1; distanceMatrix_(16,11) = -1; distanceMatrix_(16,12) = -1; distanceMatrix_(16,13) = -2; distanceMatrix_(16,14) = -1; distanceMatrix_(16,15) = 2; distanceMatrix_(16,16) = 5; distanceMatrix_(16,17) = -3; distanceMatrix_(16,18) = -2; distanceMatrix_(16,19) = 0; distanceMatrix_(17,0) = -3; distanceMatrix_(17,1) = -3; distanceMatrix_(17,2) = -4; distanceMatrix_(17,3) = -5; distanceMatrix_(17,4) = -5; distanceMatrix_(17,5) = -1; distanceMatrix_(17,6) = -3; distanceMatrix_(17,7) = -3; distanceMatrix_(17,8) = -3; distanceMatrix_(17,9) = -3; distanceMatrix_(17,10) = -2; distanceMatrix_(17,11) = -3; distanceMatrix_(17,12) = -1; distanceMatrix_(17,13) = 1; distanceMatrix_(17,14) = -4; distanceMatrix_(17,15) = -4; distanceMatrix_(17,16) = -3; distanceMatrix_(17,17) = 15; distanceMatrix_(17,18) = 2; distanceMatrix_(17,19) = -3; distanceMatrix_(18,0) = -2; distanceMatrix_(18,1) = -1; distanceMatrix_(18,2) = -2; distanceMatrix_(18,3) = -3; distanceMatrix_(18,4) = -3; distanceMatrix_(18,5) = -1; distanceMatrix_(18,6) = -2; distanceMatrix_(18,7) = -3; distanceMatrix_(18,8) = 2; distanceMatrix_(18,9) = -1; distanceMatrix_(18,10) = -1; distanceMatrix_(18,11) = -2; distanceMatrix_(18,12) = 0; distanceMatrix_(18,13) = 4; distanceMatrix_(18,14) = -3; distanceMatrix_(18,15) = -2; distanceMatrix_(18,16) = -2; distanceMatrix_(18,17) = 2; distanceMatrix_(18,18) = 8; distanceMatrix_(18,19) = -1; distanceMatrix_(19,0) = 0; distanceMatrix_(19,1) = -3; distanceMatrix_(19,2) = -3; distanceMatrix_(19,3) = -4; distanceMatrix_(19,4) = -1; distanceMatrix_(19,5) = -3; distanceMatrix_(19,6) = -3; distanceMatrix_(19,7) = -4; distanceMatrix_(19,8) = -4; distanceMatrix_(19,9) = 4; distanceMatrix_(19,10) = 1; distanceMatrix_(19,11) = -3; distanceMatrix_(19,12) = 1; distanceMatrix_(19,13) = -1; distanceMatrix_(19,14) = -3; distanceMatrix_(19,15) = -2; distanceMatrix_(19,16) = 0; distanceMatrix_(19,17) = -3; distanceMatrix_(19,18) = -1; distanceMatrix_(19,19) = 5; bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AAChouFasmanTurnIndex.h000644 000000 000000 00000006771 12147656566 024104 0ustar00rootroot000000 000000 // // File: AAChouFasmanTurnIndex.h // Created by: Bastien Boussau // Created on: Fri Jan 14 10:31 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _AACHUFASMANTURNINDEX_H_ #define _AACHUFASMANTURNINDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetTools.h" namespace bpp { /** * @brief Turn score for the Chou-Fasman algorithm of secondary structure prediction, according to http://prowl.rockefeller.edu/aainfo/chou.htm * * */ class AAChouFasmanTurnIndex : public AlphabetIndex1 { private: std::vector turn_; public: AAChouFasmanTurnIndex() : turn_() { turn_.resize(20); turn_[ 0] = 66; // A turn_[ 1] = 95; // R turn_[ 2] = 156; // N turn_[ 3] = 146; // D turn_[ 4] = 119; // C turn_[ 5] = 98; // Q turn_[ 6] = 74; // E turn_[ 7] = 156; // G turn_[ 8] = 95; // H turn_[ 9] = 47; // I turn_[10] = 59; // L turn_[11] = 101; // K turn_[12] = 60; // M turn_[13] = 60; // F turn_[14] = 152; // P turn_[15] = 143; // S turn_[16] = 96; // T turn_[17] = 96; // W turn_[18] = 114; // Y turn_[19] = 50; // V } virtual ~AAChouFasmanTurnIndex() {} AAChouFasmanTurnIndex* clone() const { return new AAChouFasmanTurnIndex(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "AAChouFasmanTurnIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return turn_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return turn_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(turn_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _AACHUFASMANTURNINDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/SimpleScore.h000644 000000 000000 00000006341 12147656566 022233 0ustar00rootroot000000 000000 // // File: SimpleScore.h // Created by: Julien Dutheil // Created on: Fri May 04 09:35 2007 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SIMPLESCORE_H_ #define _SIMPLESCORE_H_ // from the STL: #include #include "AlphabetIndex2.h" #include "../Alphabet/Alphabet.h" #include "../Alphabet/AlphabetExceptions.h" #include #include namespace bpp { /** * @brief Simple Substitution Matrix, with match and mismatch penalties. */ class SimpleScore : public virtual AlphabetIndex2 { private: LinearMatrix distanceMatrix_; const Alphabet* alphabet_; public: /** * @brief Build a new simpleScore object. * * @param alphabet The alphabet to use. * @param match Matching score. * @param mismatch Mismatching penalty. */ SimpleScore(const Alphabet* alphabet, double match, double mismatch); SimpleScore(const SimpleScore& sc) : distanceMatrix_(sc.distanceMatrix_), alphabet_(sc.alphabet_) {} SimpleScore& operator=(const SimpleScore& sc) { distanceMatrix_ = sc.distanceMatrix_; alphabet_ = sc.alphabet_; return *this; } virtual ~SimpleScore() {} SimpleScore* clone() const { return new SimpleScore(*this); } public: /** * @name Methods from the AlphabetIndex2 interface. * * @{ */ double getIndex(int state1, int state2) const throw (BadIntException); double getIndex(const std::string& state1, const std::string& state2) const throw (BadCharException); const Alphabet* getAlphabet() const { return alphabet_; } LinearMatrix* getIndexMatrix() const; /** @} */ }; } // end of namespace bpp. #endif // _SIMPLESCORE_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AAChouFasmanAHelixIndex.h000644 000000 000000 00000007110 12147656566 024312 0ustar00rootroot000000 000000 // // File: AAChouFasmanAHelixIndex.h // Created by: Bastien Boussau // Created on: Fri Jan 14 10:31 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _AACHUFASMANAHELIXINDEX_H_ #define _AACHUFASMANAHELIXINDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetTools.h" namespace bpp { /** * @brief A-Helix score for the Chou-Fasman algorithm of secondary structure prediction, according to http://prowl.rockefeller.edu/aainfo/chou.htm * * */ class AAChouFasmanAHelixIndex : public AlphabetIndex1 { private: std::vector aHelix_; public: AAChouFasmanAHelixIndex() : aHelix_() { aHelix_.resize(20); aHelix_[ 0] = 142; // A aHelix_[ 1] = 98; // R aHelix_[ 2] = 67; // N aHelix_[ 3] = 101; // D aHelix_[ 4] = 70; // C aHelix_[ 5] = 111; // Q aHelix_[ 6] = 151; // E aHelix_[ 7] = 57; // G aHelix_[ 8] = 100; // H aHelix_[ 9] = 108; // I aHelix_[10] = 121; // L aHelix_[11] = 114; // K aHelix_[12] = 145; // M aHelix_[13] = 113; // F aHelix_[14] = 57; // P aHelix_[15] = 77; // S aHelix_[16] = 83; // T aHelix_[17] = 108; // W aHelix_[18] = 69; // Y aHelix_[19] = 106; // V } virtual ~AAChouFasmanAHelixIndex() {} AAChouFasmanAHelixIndex* clone() const { return new AAChouFasmanAHelixIndex(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "AAChouFasmanAHelixIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return aHelix_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return aHelix_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(aHelix_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _AACHUFASMANAHELIXINDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AAVolumeIndex.h000644 000000 000000 00000006700 12147656566 022446 0ustar00rootroot000000 000000 // // File: AAVolumeIndex.h // Created by: Bastien Boussau // Created on: Fri Jan 14 10:31 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _AAVOLUMEINDEX_H_ #define _AAVOLUMEINDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetTools.h" namespace bpp { /** * @brief Volume (Angström^3) of each amino acid, according to http://www.imb-jena.de/IMAGE_AA.html * * */ class AAVolumeIndex : public AlphabetIndex1 { private: std::vector volume_; public: AAVolumeIndex() : volume_() { volume_.resize(20); volume_[ 0] = 115; // A volume_[ 1] = 225; // R volume_[ 2] = 160; // N volume_[ 3] = 150; // D volume_[ 4] = 135; // C volume_[ 5] = 180; // Q volume_[ 6] = 190; // E volume_[ 7] = 75; // G volume_[ 8] = 195; // H volume_[ 9] = 175; // I volume_[10] = 170; // L volume_[11] = 200; // K volume_[12] = 185; // M volume_[13] = 210; // F volume_[14] = 145; // P volume_[15] = 115; // S volume_[16] = 140; // T volume_[17] = 255; // W volume_[18] = 230; // Y volume_[19] = 155; // V } virtual ~AAVolumeIndex() {} AAVolumeIndex* clone() const { return new AAVolumeIndex(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "AAVolumeIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return volume_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return volume_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(volume_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _AAVOLUMEINDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/GranthamAAPolarityIndex.h000644 000000 000000 00000007765 12147656566 024500 0ustar00rootroot000000 000000 // // File: GranthamAAPolarityIndex.h // Created by: Julien Dutheil // Created on: Tue Apr 21 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _GRANTHAMAAPOLARITYINDEX_H_ #define _GRANTHAMAAPOLARITYINDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" namespace bpp { /** * @brief Polarity index used in Grantham (1974). * * @code * Database: AAindex1 * Entry: GRAR740102 * * H GRAR740102 * D Polarity (Grantham, 1974) * R LIT:2004143b PMID:4843792 * A Grantham, R. * T Amino acid difference formula to help explain protein evolution * J Science 185, 862-864 (1974) * I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V * 8.1 10.5 11.6 13.0 5.5 10.5 12.3 9.0 10.4 5.2 * 4.9 11.3 5.7 5.2 8.0 9.2 8.6 5.4 6.2 5.9 * // * @endcode */ class GranthamAAPolarityIndex : public virtual AlphabetIndex1 { private: std::vector polarity_; public: GranthamAAPolarityIndex() : polarity_() { polarity_.resize(20); polarity_[ 0] = 8.1; // A polarity_[ 1] = 10.5; // R polarity_[ 2] = 11.6; // N polarity_[ 3] = 13.0; // D polarity_[ 4] = 5.5; // C polarity_[ 5] = 10.5; // Q polarity_[ 6] = 12.3; // E polarity_[ 7] = 9.0; // G polarity_[ 8] = 10.4; // H polarity_[ 9] = 5.2; // I polarity_[10] = 4.9; // L polarity_[11] = 11.3; // K polarity_[12] = 5.7; // M polarity_[13] = 5.2; // F polarity_[14] = 8.0; // P polarity_[15] = 9.2; // S polarity_[16] = 8.6; // T polarity_[17] = 5.4; // W polarity_[18] = 6.2; // Y polarity_[19] = 5.9; // V } virtual ~GranthamAAPolarityIndex() {} GranthamAAPolarityIndex* clone() const { return new GranthamAAPolarityIndex(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "GranthamAAPolarityIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return polarity_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return polarity_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(polarity_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _GRANTHAMAAPOLARITYINDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.cpp000644 000000 000000 00000005730 12147656566 022716 0ustar00rootroot000000 000000 // // File: AAIndex1Entry.cpp // Created by: Julien Dutheil // Created on: Fri Jan 19 17:07 2007 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "AAIndex1Entry.h" #include "../Alphabet/AlphabetTools.h" #include #include #include using namespace bpp; using namespace std; AAIndex1Entry::AAIndex1Entry(std::istream& input) throw (IOException) : property_(20), alpha_(&AlphabetTools::PROTEIN_ALPHABET) { // Parse entry: string line; bool ok = false; do { line = FileTools::getNextLine(input); if (line[0] == 'I') { string line1 = FileTools::getNextLine(input); string line2 = FileTools::getNextLine(input); StringTokenizer st1(line1, " "); StringTokenizer st2(line2, " "); if (st1.numberOfRemainingTokens() != 10 || st1.numberOfRemainingTokens() != 10) break; // Amino acids are in the same order in the AAIndex1 database than in the ProteicAlphabet class: for (unsigned int i = 0; i < 10; i++) { property_[i] = TextTools::toDouble(st1.nextToken()); } for (unsigned int i = 10; i < 20; i++) { property_[i] = TextTools::toDouble(st2.nextToken()); } // Jump to next entry... FileTools::getNextLine(input); ok = true; } } while (!ok); if (!ok) throw IOException("AAIndex1Entry: invalid AAIndex1 entry."); } bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AASEASup30Index.h000644 000000 000000 00000007150 12147656566 022442 0ustar00rootroot000000 000000 // // File: AASEASup30Index.h // Created by: Bastien Boussau // Created on: Fri Jan 14 10:31 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _AASEASUP30INDEX_H_ #define _AASEASUP30INDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetTools.h" namespace bpp { /** * @brief Percentage of amino acids having a Solvent Exposed Area above 30 Angström^2 for each type of amino acid, according to http://prowl.rockefeller.edu/aainfo/access.htm * * */ class AASEASup30Index : public AlphabetIndex1 { private: std::vector seaSup30_; public: AASEASup30Index() : seaSup30_() { seaSup30_.resize(20); seaSup30_[ 0] = 0.48; // A seaSup30_[ 1] = 0.84; // R seaSup30_[ 2] = 0.82; // N seaSup30_[ 3] = 0.81; // D seaSup30_[ 4] = 0.32; // C seaSup30_[ 5] = 0.81; // Q seaSup30_[ 6] = 0.93; // E seaSup30_[ 7] = 0.51; // G seaSup30_[ 8] = 0.66; // H seaSup30_[ 9] = 0.39; // I seaSup30_[10] = 0.41; // L seaSup30_[11] = 0.93; // K seaSup30_[12] = 0.44; // M seaSup30_[13] = 0.42; // F seaSup30_[14] = 0.78; // P seaSup30_[15] = 0.70; // S seaSup30_[16] = 0.71; // T seaSup30_[17] = 0.49; // W seaSup30_[18] = 0.67; // Y seaSup30_[19] = 0.40; // V } virtual ~AASEASup30Index() {} AASEASup30Index* clone() const { return new AASEASup30Index(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "AASEASup30Index::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return seaSup30_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return seaSup30_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(seaSup30_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _AASEASUP30INDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/GranthamAAVolumeIndex.h000644 000000 000000 00000007716 12147656566 024140 0ustar00rootroot000000 000000 // // File: GranthamAAVolumeIndex.h // Created by: Julien Dutheil // Created on: Tue Apr 21 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _GRANTHAMAAVOLUMEINDEX_H_ #define _GRANTHAMAAVOLUMEINDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" // From the STL: #include namespace bpp { /** * @brief Volume index used in Grantham (1974). * * @code * Database: AAindex1 * Entry: GRAR740103 * * H GRAR740103 * D Volume (Grantham, 1974) * R LIT:2004143b PMID:4843792 * A Grantham, R. * T Amino acid difference formula to help explain protein evolution * J Science 185, 862-864 (1974) * I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V * 31. 124. 56. 54. 55. 85. 83. 3. 96. 111. * 111. 119. 105. 132. 32.5 32. 61. 170. 136. 84. * // * @endcode */ class GranthamAAVolumeIndex : public virtual AlphabetIndex1 { private: std::vector volume_; public: GranthamAAVolumeIndex() : volume_() { volume_.resize(20); volume_[ 0] = 31.; // A volume_[ 1] = 124.; // R volume_[ 2] = 56.; // N volume_[ 3] = 54.; // D volume_[ 4] = 55.; // C volume_[ 5] = 85.; // Q volume_[ 6] = 83.; // E volume_[ 7] = 3.; // G volume_[ 8] = 96.; // H volume_[ 9] = 111.; // I volume_[10] = 111.; // L volume_[11] = 119.; // K volume_[12] = 105.; // M volume_[13] = 132.; // F volume_[14] = 32.5; // P volume_[15] = 32.; // S volume_[16] = 61.; // T volume_[17] = 170.; // W volume_[18] = 136.; // Y volume_[19] = 84.; // V } virtual ~GranthamAAVolumeIndex() {} GranthamAAVolumeIndex* clone() const { return new GranthamAAVolumeIndex(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "GranthamAAVolumeIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return volume_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return volume_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(volume_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _GRANTHAMAAVOLUMEINDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/__MiyataMatrixCode000644 000000 000000 00000027474 12147656566 023274 0ustar00rootroot000000 000000 distanceMatrix_(0,0) = 0; distanceMatrix_(1,0) = 2.92; distanceMatrix_(1,1) = 0; distanceMatrix_(2,0) = 1.78; distanceMatrix_(2,1) = 2.04; distanceMatrix_(2,2) = 0; distanceMatrix_(3,0) = 2.37; distanceMatrix_(3,1) = 2.34; distanceMatrix_(3,2) = 0.65; distanceMatrix_(3,3) = 0; distanceMatrix_(4,0) = 1.39; distanceMatrix_(4,1) = 3.06; distanceMatrix_(4,2) = 2.83; distanceMatrix_(4,3) = 3.48; distanceMatrix_(4,4) = 0; distanceMatrix_(5,0) = 1.92; distanceMatrix_(5,1) = 1.13; distanceMatrix_(5,2) = 0.99; distanceMatrix_(5,3) = 1.47; distanceMatrix_(5,4) = 2.48; distanceMatrix_(5,5) = 0; distanceMatrix_(6,0) = 2.46; distanceMatrix_(6,1) = 1.45; distanceMatrix_(6,2) = 0.85; distanceMatrix_(6,3) = 0.9; distanceMatrix_(6,4) = 3.26; distanceMatrix_(6,5) = 0.84; distanceMatrix_(6,6) = 0; distanceMatrix_(7,0) = 0.91; distanceMatrix_(7,1) = 3.58; distanceMatrix_(7,2) = 1.96; distanceMatrix_(7,3) = 2.37; distanceMatrix_(7,4) = 2.22; distanceMatrix_(7,5) = 2.48; distanceMatrix_(7,6) = 2.78; distanceMatrix_(7,7) = 0; distanceMatrix_(8,0) = 2.17; distanceMatrix_(8,1) = 0.82; distanceMatrix_(8,2) = 1.29; distanceMatrix_(8,3) = 1.72; distanceMatrix_(8,4) = 2.56; distanceMatrix_(8,5) = 0.32; distanceMatrix_(8,6) = 0.96; distanceMatrix_(8,7) = 2.78; distanceMatrix_(8,8) = 0; distanceMatrix_(9,0) = 2.69; distanceMatrix_(9,1) = 2.49; distanceMatrix_(9,2) = 3.37; distanceMatrix_(9,3) = 3.98; distanceMatrix_(9,4) = 1.63; distanceMatrix_(9,5) = 2.57; distanceMatrix_(9,6) = 3.39; distanceMatrix_(9,7) = 3.6; distanceMatrix_(9,8) = 2.45; distanceMatrix_(9,9) = 0; distanceMatrix_(10,0) = 2.76; distanceMatrix_(10,1) = 2.62; distanceMatrix_(10,2) = 3.49; distanceMatrix_(10,3) = 4.1; distanceMatrix_(10,4) = 1.65; distanceMatrix_(10,5) = 2.7; distanceMatrix_(10,6) = 3.53; distanceMatrix_(10,7) = 3.67; distanceMatrix_(10,8) = 2.59; distanceMatrix_(10,9) = 0.14; distanceMatrix_(10,10) = 0; distanceMatrix_(11,0) = 2.96; distanceMatrix_(11,1) = 0.4; distanceMatrix_(11,2) = 1.84; distanceMatrix_(11,3) = 2.05; distanceMatrix_(11,4) = 3.27; distanceMatrix_(11,5) = 1.06; distanceMatrix_(11,6) = 1.14; distanceMatrix_(11,7) = 3.54; distanceMatrix_(11,8) = 0.79; distanceMatrix_(11,9) = 2.84; distanceMatrix_(11,10) = 2.98; distanceMatrix_(11,11) = 0; distanceMatrix_(12,0) = 2.42; distanceMatrix_(12,1) = 2.29; distanceMatrix_(12,2) = 3.08; distanceMatrix_(12,3) = 3.69; distanceMatrix_(12,4) = 1.46; distanceMatrix_(12,5) = 2.3; distanceMatrix_(12,6) = 3.13; distanceMatrix_(12,7) = 3.34; distanceMatrix_(12,8) = 2.19; distanceMatrix_(12,9) = 0.29; distanceMatrix_(12,10) = 0.41; distanceMatrix_(12,11) = 2.63; distanceMatrix_(12,12) = 0; distanceMatrix_(13,0) = 3.23; distanceMatrix_(13,1) = 2.47; distanceMatrix_(13,2) = 3.7; distanceMatrix_(13,3) = 4.27; distanceMatrix_(13,4) = 2.24; distanceMatrix_(13,5) = 2.81; distanceMatrix_(13,6) = 3.59; distanceMatrix_(13,7) = 4.14; distanceMatrix_(13,8) = 2.63; distanceMatrix_(13,9) = 0.61; distanceMatrix_(13,10) = 0.63; distanceMatrix_(13,11) = 2.85; distanceMatrix_(13,12) = 0.82; distanceMatrix_(13,13) = 0; distanceMatrix_(14,0) = 0.06; distanceMatrix_(14,1) = 2.9; distanceMatrix_(14,2) = 1.8; distanceMatrix_(14,3) = 2.4; distanceMatrix_(14,4) = 1.33; distanceMatrix_(14,5) = 1.92; distanceMatrix_(14,6) = 2.48; distanceMatrix_(14,7) = 0.97; distanceMatrix_(14,8) = 2.15; distanceMatrix_(14,9) = 2.62; distanceMatrix_(14,10) = 2.7; distanceMatrix_(14,11) = 2.94; distanceMatrix_(14,12) = 2.36; distanceMatrix_(14,13) = 3.17; distanceMatrix_(14,14) = 0; distanceMatrix_(15,0) = 0.51; distanceMatrix_(15,1) = 2.74; distanceMatrix_(15,2) = 1.31; distanceMatrix_(15,3) = 1.87; distanceMatrix_(15,4) = 1.84; distanceMatrix_(15,5) = 1.65; distanceMatrix_(15,6) = 2.06; distanceMatrix_(15,7) = 0.85; distanceMatrix_(15,8) = 1.94; distanceMatrix_(15,9) = 2.95; distanceMatrix_(15,10) = 3.04; distanceMatrix_(15,11) = 2.71; distanceMatrix_(15,12) = 2.67; distanceMatrix_(15,13) = 3.45; distanceMatrix_(15,14) = 0.56; distanceMatrix_(15,15) = 0; distanceMatrix_(16,0) = 0.9; distanceMatrix_(16,1) = 2.03; distanceMatrix_(16,2) = 1.4; distanceMatrix_(16,3) = 2.05; distanceMatrix_(16,4) = 1.45; distanceMatrix_(16,5) = 1.12; distanceMatrix_(16,6) = 1.83; distanceMatrix_(16,7) = 1.7; distanceMatrix_(16,8) = 1.32; distanceMatrix_(16,9) = 2.14; distanceMatrix_(16,10) = 2.25; distanceMatrix_(16,11) = 2.1; distanceMatrix_(16,12) = 1.86; distanceMatrix_(16,13) = 2.6; distanceMatrix_(16,14) = 0.87; distanceMatrix_(16,15) = 0.89; distanceMatrix_(16,16) = 0; distanceMatrix_(17,0) = 4.23; distanceMatrix_(17,1) = 2.72; distanceMatrix_(17,2) = 4.39; distanceMatrix_(17,3) = 4.88; distanceMatrix_(17,4) = 3.34; distanceMatrix_(17,5) = 3.42; distanceMatrix_(17,6) = 4.08; distanceMatrix_(17,7) = 5.13; distanceMatrix_(17,8) = 3.16; distanceMatrix_(17,9) = 1.72; distanceMatrix_(17,10) = 1.73; distanceMatrix_(17,11) = 3.11; distanceMatrix_(17,12) = 1.89; distanceMatrix_(17,13) = 1.11; distanceMatrix_(17,14) = 4.17; distanceMatrix_(17,15) = 4.38; distanceMatrix_(17,16) = 3.5; distanceMatrix_(17,17) = 0; distanceMatrix_(18,0) = 3.18; distanceMatrix_(18,1) = 2.02; distanceMatrix_(18,2) = 3.42; distanceMatrix_(18,3) = 3.95; distanceMatrix_(18,4) = 2.38; distanceMatrix_(18,5) = 2.48; distanceMatrix_(18,6) = 3.22; distanceMatrix_(18,7) = 4.08; distanceMatrix_(18,8) = 2.27; distanceMatrix_(18,9) = 0.86; distanceMatrix_(18,10) = 0.94; distanceMatrix_(18,11) = 2.42; distanceMatrix_(18,12) = 0.93; distanceMatrix_(18,13) = 0.48; distanceMatrix_(18,14) = 3.12; distanceMatrix_(18,15) = 3.33; distanceMatrix_(18,16) = 2.45; distanceMatrix_(18,17) = 1.06; distanceMatrix_(18,18) = 0; distanceMatrix_(19,0) = 1.85; distanceMatrix_(19,1) = 2.43; distanceMatrix_(19,2) = 2.76; distanceMatrix_(19,3) = 3.4; distanceMatrix_(19,4) = 0.86; distanceMatrix_(19,5) = 2.13; distanceMatrix_(19,6) = 2.97; distanceMatrix_(19,7) = 2.76; distanceMatrix_(19,8) = 2.11; distanceMatrix_(19,9) = 0.85; distanceMatrix_(19,10) = 0.91; distanceMatrix_(19,11) = 2.7; distanceMatrix_(19,12) = 0.62; distanceMatrix_(19,13) = 1.43; distanceMatrix_(19,14) = 1.79; distanceMatrix_(19,15) = 2.15; distanceMatrix_(19,16) = 1.42; distanceMatrix_(19,17) = 2.51; distanceMatrix_(19,18) = 1.52; distanceMatrix_(19,19) = 0; distanceMatrix_(0,1) = -2.92; distanceMatrix_(0,2) = -1.78; distanceMatrix_(0,3) = -2.37; distanceMatrix_(0,4) = -1.39; distanceMatrix_(0,5) = -1.92; distanceMatrix_(0,6) = -2.46; distanceMatrix_(0,7) = -0.91; distanceMatrix_(0,8) = -2.17; distanceMatrix_(0,9) = -2.69; distanceMatrix_(0,10) = -2.76; distanceMatrix_(0,11) = -2.96; distanceMatrix_(0,12) = -2.42; distanceMatrix_(0,13) = -3.23; distanceMatrix_(0,14) = -0.06; distanceMatrix_(0,15) = -0.51; distanceMatrix_(0,16) = -0.9; distanceMatrix_(0,17) = -4.23; distanceMatrix_(0,18) = -3.18; distanceMatrix_(0,19) = -1.85; distanceMatrix_(1,2) = -2.04; distanceMatrix_(1,3) = -2.34; distanceMatrix_(1,4) = -3.06; distanceMatrix_(1,5) = -1.13; distanceMatrix_(1,6) = -1.45; distanceMatrix_(1,7) = -3.58; distanceMatrix_(1,8) = -0.82; distanceMatrix_(1,9) = -2.49; distanceMatrix_(1,10) = -2.62; distanceMatrix_(1,11) = -0.4; distanceMatrix_(1,12) = -2.29; distanceMatrix_(1,13) = -2.47; distanceMatrix_(1,14) = -2.9; distanceMatrix_(1,15) = -2.74; distanceMatrix_(1,16) = -2.03; distanceMatrix_(1,17) = -2.72; distanceMatrix_(1,18) = -2.02; distanceMatrix_(1,19) = -2.43; distanceMatrix_(2,3) = -0.65; distanceMatrix_(2,4) = -2.83; distanceMatrix_(2,5) = -0.99; distanceMatrix_(2,6) = -0.85; distanceMatrix_(2,7) = -1.96; distanceMatrix_(2,8) = -1.29; distanceMatrix_(2,9) = -3.37; distanceMatrix_(2,10) = -3.49; distanceMatrix_(2,11) = -1.84; distanceMatrix_(2,12) = -3.08; distanceMatrix_(2,13) = -3.7; distanceMatrix_(2,14) = -1.8; distanceMatrix_(2,15) = -1.31; distanceMatrix_(2,16) = -1.4; distanceMatrix_(2,17) = -4.39; distanceMatrix_(2,18) = -3.42; distanceMatrix_(2,19) = -2.76; distanceMatrix_(3,4) = -3.48; distanceMatrix_(3,5) = -1.47; distanceMatrix_(3,6) = -0.9; distanceMatrix_(3,7) = -2.37; distanceMatrix_(3,8) = -1.72; distanceMatrix_(3,9) = -3.98; distanceMatrix_(3,10) = -4.1; distanceMatrix_(3,11) = -2.05; distanceMatrix_(3,12) = -3.69; distanceMatrix_(3,13) = -4.27; distanceMatrix_(3,14) = -2.4; distanceMatrix_(3,15) = -1.87; distanceMatrix_(3,16) = -2.05; distanceMatrix_(3,17) = -4.88; distanceMatrix_(3,18) = -3.95; distanceMatrix_(3,19) = -3.4; distanceMatrix_(4,5) = -2.48; distanceMatrix_(4,6) = -3.26; distanceMatrix_(4,7) = -2.22; distanceMatrix_(4,8) = -2.56; distanceMatrix_(4,9) = -1.63; distanceMatrix_(4,10) = -1.65; distanceMatrix_(4,11) = -3.27; distanceMatrix_(4,12) = -1.46; distanceMatrix_(4,13) = -2.24; distanceMatrix_(4,14) = -1.33; distanceMatrix_(4,15) = -1.84; distanceMatrix_(4,16) = -1.45; distanceMatrix_(4,17) = -3.34; distanceMatrix_(4,18) = -2.38; distanceMatrix_(4,19) = -0.86; distanceMatrix_(5,6) = -0.84; distanceMatrix_(5,7) = -2.48; distanceMatrix_(5,8) = -0.32; distanceMatrix_(5,9) = -2.57; distanceMatrix_(5,10) = -2.7; distanceMatrix_(5,11) = -1.06; distanceMatrix_(5,12) = -2.3; distanceMatrix_(5,13) = -2.81; distanceMatrix_(5,14) = -1.92; distanceMatrix_(5,15) = -1.65; distanceMatrix_(5,16) = -1.12; distanceMatrix_(5,17) = -3.42; distanceMatrix_(5,18) = -2.48; distanceMatrix_(5,19) = -2.13; distanceMatrix_(6,7) = -2.78; distanceMatrix_(6,8) = -0.96; distanceMatrix_(6,9) = -3.39; distanceMatrix_(6,10) = -3.53; distanceMatrix_(6,11) = -1.14; distanceMatrix_(6,12) = -3.13; distanceMatrix_(6,13) = -3.59; distanceMatrix_(6,14) = -2.48; distanceMatrix_(6,15) = -2.06; distanceMatrix_(6,16) = -1.83; distanceMatrix_(6,17) = -4.08; distanceMatrix_(6,18) = -3.22; distanceMatrix_(6,19) = -2.97; distanceMatrix_(7,8) = -2.78; distanceMatrix_(7,9) = -3.6; distanceMatrix_(7,10) = -3.67; distanceMatrix_(7,11) = -3.54; distanceMatrix_(7,12) = -3.34; distanceMatrix_(7,13) = -4.14; distanceMatrix_(7,14) = -0.97; distanceMatrix_(7,15) = -0.85; distanceMatrix_(7,16) = -1.7; distanceMatrix_(7,17) = -5.13; distanceMatrix_(7,18) = -4.08; distanceMatrix_(7,19) = -2.76; distanceMatrix_(8,9) = -2.45; distanceMatrix_(8,10) = -2.59; distanceMatrix_(8,11) = -0.79; distanceMatrix_(8,12) = -2.19; distanceMatrix_(8,13) = -2.63; distanceMatrix_(8,14) = -2.15; distanceMatrix_(8,15) = -1.94; distanceMatrix_(8,16) = -1.32; distanceMatrix_(8,17) = -3.16; distanceMatrix_(8,18) = -2.27; distanceMatrix_(8,19) = -2.11; distanceMatrix_(9,10) = -0.14; distanceMatrix_(9,11) = -2.84; distanceMatrix_(9,12) = -0.29; distanceMatrix_(9,13) = -0.61; distanceMatrix_(9,14) = -2.62; distanceMatrix_(9,15) = -2.95; distanceMatrix_(9,16) = -2.14; distanceMatrix_(9,17) = -1.72; distanceMatrix_(9,18) = -0.86; distanceMatrix_(9,19) = -0.85; distanceMatrix_(10,11) = -2.98; distanceMatrix_(10,12) = -0.41; distanceMatrix_(10,13) = -0.63; distanceMatrix_(10,14) = -2.7; distanceMatrix_(10,15) = -3.04; distanceMatrix_(10,16) = -2.25; distanceMatrix_(10,17) = -1.73; distanceMatrix_(10,18) = -0.94; distanceMatrix_(10,19) = -0.91; distanceMatrix_(11,12) = -2.63; distanceMatrix_(11,13) = -2.85; distanceMatrix_(11,14) = -2.94; distanceMatrix_(11,15) = -2.71; distanceMatrix_(11,16) = -2.1; distanceMatrix_(11,17) = -3.11; distanceMatrix_(11,18) = -2.42; distanceMatrix_(11,19) = -2.7; distanceMatrix_(12,13) = -0.82; distanceMatrix_(12,14) = -2.36; distanceMatrix_(12,15) = -2.67; distanceMatrix_(12,16) = -1.86; distanceMatrix_(12,17) = -1.89; distanceMatrix_(12,18) = -0.93; distanceMatrix_(12,19) = -0.62; distanceMatrix_(13,14) = -3.17; distanceMatrix_(13,15) = -3.45; distanceMatrix_(13,16) = -2.6; distanceMatrix_(13,17) = -1.11; distanceMatrix_(13,18) = -0.48; distanceMatrix_(13,19) = -1.43; distanceMatrix_(14,15) = -0.56; distanceMatrix_(14,16) = -0.87; distanceMatrix_(14,17) = -4.17; distanceMatrix_(14,18) = -3.12; distanceMatrix_(14,19) = -1.79; distanceMatrix_(15,16) = -0.89; distanceMatrix_(15,17) = -4.38; distanceMatrix_(15,18) = -3.33; distanceMatrix_(15,19) = -2.15; distanceMatrix_(16,17) = -3.5; distanceMatrix_(16,18) = -2.45; distanceMatrix_(16,19) = -1.42; distanceMatrix_(17,18) = -1.06; distanceMatrix_(17,19) = -2.51; distanceMatrix_(18,19) = -1.52; bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.h000644 000000 000000 00000006320 12147656566 022357 0ustar00rootroot000000 000000 // // File: AAIndex1Entry.h // Created by: Julien Dutheil // Created on: Fri Jan 19 17:07 2007 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _AAINDEX1ENTRY_H_ #define _AAINDEX1ENTRY_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" namespace bpp { /** * @brief Create a AlphabetIndex1 object from an AAIndex2 entry. */ class AAIndex1Entry : public AlphabetIndex1 { private: std::vector property_; const ProteicAlphabet* alpha_; public: /** * @brief Create a new AAIndex1Entry from an input stream. * * @param input The input stream to use. * @throw IOException if the stream content does not follow the AAIndex1 database entry format. */ AAIndex1Entry(std::istream& input) throw (IOException); AAIndex1Entry(const AAIndex1Entry& index) : property_(index.property_), alpha_(index.alpha_) {} AAIndex1Entry& operator=(const AAIndex1Entry& index) { property_ = index.property_; alpha_ = index.alpha_; return *this; } virtual ~AAIndex1Entry() {} AAIndex1Entry* clone() const { return new AAIndex1Entry(*this); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "KleinAANetChargeIndex::getIndex(). Invalid state.", alpha_); return property_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return property_[alpha_->charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(property_); } const Alphabet* getAlphabet() const { return alpha_; } }; } // end of namespace bpp. #endif // _AAINDEX1ENTRY_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AAIndex2Entry.h000644 000000 000000 00000007356 12147656566 022372 0ustar00rootroot000000 000000 // // File: AAIndex2Entry.h // Created by: Julien Dutheil // Created on: Fri Jan 19 17:07 2007 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _AAINDEX2ENTRY_H_ #define _AAINDEX2ENTRY_H_ #include "AlphabetIndex2.h" #include "../Alphabet/ProteicAlphabet.h" #include namespace bpp { /** * @brief Create a AlphabetIndex2 object from an AAIndex2 entry. */ class AAIndex2Entry : public virtual AlphabetIndex2 { private: LinearMatrix property_; const ProteicAlphabet* alpha_; public: /** * @brief Create a new AAIndex2Entry from an input stream. * * @param input The input stream to use. * @param sym Tell if the matrix is symmetric. * This option as an effect only if the matrix is specified as a triangle in the entry. * If sym==true, the oher triangle will be built by symmetry. * If sym==false, the other triangle will be set to (-) the given triangle. * @throw IOException if the stream content does not follow the AAIndex2 database entry format. */ AAIndex2Entry(std::istream& input, bool sym = true) throw (IOException); AAIndex2Entry(const AAIndex2Entry& index) : property_(index.property_), alpha_(index.alpha_) {} AAIndex2Entry& operator=(const AAIndex2Entry& index) { property_ = index.property_; alpha_ = index.alpha_; return *this; } virtual ~AAIndex2Entry() {} public: const Alphabet* getAlphabet() const { return alpha_; } AAIndex2Entry* clone() const { return new AAIndex2Entry(*this); } double getIndex(int state1, int state2) const throw (BadIntException) { if (state1 < 0 || state1 > 19) throw BadIntException(state1, "AAIndex2Entry::getIndex(). Invalid state1.", alpha_); if (state2 < 0 || state2 > 19) throw BadIntException(state2, "AAIndex2Entry::getIndex(). Invalid state2.", alpha_); return property_(state1, state2); } double getIndex(const std::string& state1, const std::string& state2) const throw (BadCharException) { return property_(alpha_->charToInt(state1), alpha_->charToInt(state2)); } LinearMatrix* getIndexMatrix() const { return new LinearMatrix(property_); } }; } // end of namespace bpp. #endif // _AAINDEX2ENTRY_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/KleinAANetChargeIndex.h000644 000000 000000 00000007755 12147656566 024035 0ustar00rootroot000000 000000 // // File: KleinAANetChargeIndex.h // Created by: Julien Dutheil // Created on: Wed Aug 31 17:48 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _KLEINAANETCHARGEINDEX_H_ #define _KLEINAANETCHARGEINDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" namespace bpp { /** * @brief Charge of each amino acid. * * @code * Database: AAindex * Entry: KLEP840101 * * H KLEP840101 * D Net charge (Klein et al., 1984) * R LIT:1008055 PMID:6547351 * A Klein, P., Kanehisa, M. and DeLisi, C. * T Prediction of protein function from sequence properties: Discriminant * analysis of a data base * J Biochim. Biophys. Acta 787, 221-226 (1984) * C ZIMJ680104 0.941 * I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V * 0. 1. 0. -1. 0. 0. -1. 0. 0. 0. * 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. * // * @endcode */ class KleinAANetChargeIndex : public AlphabetIndex1 { private: std::vector charge_; public: KleinAANetChargeIndex() : charge_() { charge_.resize(20); charge_[ 0] = 0.; // A charge_[ 1] = 1.; // R charge_[ 2] = 0.; // N charge_[ 3] = -1.; // D charge_[ 4] = 0.; // C charge_[ 5] = 0.; // Q charge_[ 6] = -1.; // E charge_[ 7] = 0.; // G charge_[ 8] = 0.; // H charge_[ 9] = 0.; // I charge_[10] = 0.; // L charge_[11] = 1.; // K charge_[12] = 0.; // M charge_[13] = 0.; // F charge_[14] = 0.; // P charge_[15] = 0.; // S charge_[16] = 0.; // T charge_[17] = 0.; // W charge_[18] = 0.; // Y charge_[19] = 0.; // V } virtual ~KleinAANetChargeIndex() {} KleinAANetChargeIndex* clone() const { return new KleinAANetChargeIndex(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "KleinAANetChargeIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return charge_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return charge_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(charge_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _KLEINAANETCHARGEINDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/SimpleIndexDistance.h000644 000000 000000 00000010173 12147656566 023700 0ustar00rootroot000000 000000 // // File: SimpleIndexDistance.h // Created by: Julien Dutheil // Created on: Tue Apr 21 2005 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SIMPLEINDEXDISTANCE_H_ #define _SIMPLEINDEXDISTANCE_H_ // from the STL: #include #include "AlphabetIndex1.h" #include "AlphabetIndex2.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetExceptions.h" #include #include namespace bpp { /** * @brief Simple dissimilarity distance. * * Take a one-dimensional index end return the difference between the * indexes of two states. */ class SimpleIndexDistance : public virtual AlphabetIndex2 { private: std::auto_ptr index_; bool sym_; public: SimpleIndexDistance(AlphabetIndex1* index) : index_(index), sym_(false) {} SimpleIndexDistance(const SimpleIndexDistance& sid) : index_(dynamic_cast(sid.index_->clone())), sym_(sid.sym_) {} SimpleIndexDistance& operator=(const SimpleIndexDistance& sid) { index_.reset(dynamic_cast(sid.index_->clone())); sym_ = sid.sym_; return *this; } virtual ~SimpleIndexDistance() {} public: double getIndex(int state1, int state2) const throw (BadIntException) { double d = index_->getIndex(state2) - index_->getIndex(state1); return sym_ ? NumTools::abs(d) : d; } double getIndex(const std::string& state1, const std::string& state2) const throw (BadCharException) { double d = index_->getIndex(state2) - index_->getIndex(state1); return sym_ ? NumTools::abs(d) : d; } const Alphabet* getAlphabet() const { return index_->getAlphabet(); } Clonable* clone() const { return new SimpleIndexDistance(*this); } Matrix* getIndexMatrix() const { size_t n = index_->getAlphabet()->getSize(); //We should change to "supported ints" there... RowMatrix* m = new RowMatrix(n, n); for (int i = 0; i < static_cast(n); i++) { for (int j = 0; j < static_cast(n); j++) { (*m)(i, j) = getIndex(i, j); } } return m; } public: void setSymmetric(bool yn) { sym_ = yn; } bool isSymmetric() const { return sym_; } /** * @return The AlphabetIndex1 object associated to this object. */ const AlphabetIndex1& getAlphabetIndex1() const { return *index_; } /** * @return The AlphabetIndex1 object associated to this object. */ AlphabetIndex1& getAlphabetIndex1() { return *index_; } }; } // end of namespace bpp. #endif // _SIMPLEINDEXDISTANCE_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/MiyataAAChemicalDistance.cpp000644 000000 000000 00000006124 12147656566 025067 0ustar00rootroot000000 000000 // // File: MiyataAAChemicalDistance.cpp // Created by: Julien Dutheil // Created on: Mon Feb 21 17:42 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ // from the STL: #include using namespace std; #include "MiyataAAChemicalDistance.h" #include "../Alphabet/AlphabetTools.h" #include using namespace bpp; MiyataAAChemicalDistance::MiyataAAChemicalDistance() : distanceMatrix_(20, 20), alpha_(&AlphabetTools::PROTEIN_ALPHABET), sym_(true) { #include "__MiyataMatrixCode" } double MiyataAAChemicalDistance::getIndex(int state1, int state2) const throw (BadIntException) { if (state1 < 0 || state1 > 19) throw BadIntException(state1, "MiyataAAChemicalDistance::getIndex(). Invalid state1.", alpha_); if (state2 < 0 || state2 > 19) throw BadIntException(state2, "MiyataAAChemicalDistance::getIndex(). Invalid state2.", alpha_); double d = distanceMatrix_(state1, state2); return sym_ ? NumTools::abs(d) : d; } double MiyataAAChemicalDistance::getIndex(const string& state1, const string& state2) const throw (BadCharException) { double d = distanceMatrix_(alpha_->charToInt(state1), alpha_->charToInt(state2)); return sym_ ? NumTools::abs(d) : d; } Matrix* MiyataAAChemicalDistance::getIndexMatrix() const { RowMatrix* m = new RowMatrix(distanceMatrix_); if (sym_) { for (unsigned int i = 0; i < 20; i++) { for (unsigned int j = 0; j < 20; j++) { (*m)(i, j) = NumTools::abs((*m)(i, j)); } } } return m; } bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/DefaultNucleotideScore.cpp000644 000000 000000 00000007336 12147656566 024742 0ustar00rootroot000000 000000 // // File: DefaultNucleotideScore.cpp // Created by: Julien Dutheil // Created on: Fri Jan 19 10:30 2007 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "DefaultNucleotideScore.h" // from the STL: #include using namespace std; using namespace bpp; DefaultNucleotideScore::DefaultNucleotideScore(const NucleicAlphabet* alphabet) : distanceMatrix_(4, 4), alpha_(alphabet) { // Load the matrix: distanceMatrix_(0, 0) = 10; distanceMatrix_(0, 1) = -3; distanceMatrix_(0, 2) = -1; distanceMatrix_(0, 3) = -4; distanceMatrix_(1, 0) = -3; distanceMatrix_(1, 1) = 9; distanceMatrix_(1, 2) = -5; distanceMatrix_(1, 3) = 0; distanceMatrix_(2, 0) = -1; distanceMatrix_(2, 1) = -5; distanceMatrix_(2, 2) = 7; distanceMatrix_(2, 3) = -3; distanceMatrix_(3, 0) = -4; distanceMatrix_(3, 1) = 0; distanceMatrix_(3, 2) = -3; distanceMatrix_(3, 3) = 8; } double DefaultNucleotideScore::getIndex(int state1, int state2) const throw (BadIntException) { if (alpha_->isGap(state1) || !alpha_->isIntInAlphabet(state1)) throw BadIntException(state1, "DefaultNucleotideScore::getIndex(). Invalid state1.", alpha_); if (alpha_->isGap(state2) || !alpha_->isIntInAlphabet(state2)) throw BadIntException(state2, "DefaultNucleotideScore::getIndex(). Invalid state1.", alpha_); if (!alpha_->isUnresolved(state1) && !alpha_->isUnresolved(state2)) return distanceMatrix_(state1, state2); vector states1 = alpha_->getAlias(state1); vector states2 = alpha_->getAlias(state2); double score = -5; double tmp_score; for (size_t i = 0; i < states1.size(); i++) { for (size_t j = 0; j < states2.size(); j++) { tmp_score = getIndex(states1[i], states2[j]); if (tmp_score > score) score = tmp_score; } } return score / static_cast(states1.size() + states2.size() - 1); } double DefaultNucleotideScore::getIndex(const std::string& state1, const std::string& state2) const throw (BadCharException) { return distanceMatrix_(alpha_->charToInt(state1), alpha_->charToInt(state2)); } LinearMatrix* DefaultNucleotideScore::getIndexMatrix() const { return new LinearMatrix(distanceMatrix_); } bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AAChargeIndex.h000644 000000 000000 00000011716 12147656566 022373 0ustar00rootroot000000 000000 // // File: AAChargeIndex.h // Created by: Julien Dutheil // Created on: Tue May 02 13:34 2006 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _AACHARGEINDEX_H_ #define _AACHARGEINDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetTools.h" namespace bpp { /** * @brief Charge of each amino acid. * * @code * Database: AAindex1 * Entry: FAUJ880111 * * H FAUJ880111 * D Positive charge (Fauchere et al., 1988) * R LIT:1414114 PMID:3209351 * A Fauchere, J.L., Charton, M., Kier, L.B., Verloop, A. and Pliska, V. * T Amino acid side chain parameters for correlation studies in biology and * pharmacology * J Int. J. Peptide Protein Res. 32, 269-278 (1988) * C ZIMJ680104 0.813 * I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V * 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. * 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. * // * @endcode * * @code * Database: AAindex1 * Entry: FAUJ880111 * * H FAUJ880112 * D Negative charge (Fauchere et al., 1988) * R LIT:1414114 PMID:3209351 * A Fauchere, J.L., Charton, M., Kier, L.B., Verloop, A. and Pliska, V. * T Amino acid side chain parameters for correlation studies in biology and * pharmacology * J Int. J. Peptide Protein Res. 32, 269-278 (1988) * C RICJ880106 0.849 * I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V * 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. * 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. * Soit: * // * @endcode * * Hence, combining the two: * @code * I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V * 0. 1. 0. -1. 0. 0. -1. 0. 1. 0. * 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. * @endcode */ class AAChargeIndex : public AlphabetIndex1 { private: std::vector charge_; public: AAChargeIndex() : charge_() { charge_.resize(20); charge_[ 0] = 0.; // A charge_[ 1] = 1.; // R charge_[ 2] = 0.; // N charge_[ 3] = -1.; // D charge_[ 4] = 0.; // C charge_[ 5] = 0.; // Q charge_[ 6] = -1.; // E charge_[ 7] = 0.; // G charge_[ 8] = 1.; // H charge_[ 9] = 0.; // I charge_[10] = 0.; // L charge_[11] = 1.; // K charge_[12] = 0.; // M charge_[13] = 0.; // F charge_[14] = 0.; // P charge_[15] = 0.; // S charge_[16] = 0.; // T charge_[17] = 0.; // W charge_[18] = 0.; // Y charge_[19] = 0.; // V } virtual ~AAChargeIndex() {} AAChargeIndex* clone() const { return new AAChargeIndex(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "AAChargeIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return charge_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return charge_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(charge_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _AACHARGEINDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/__GranthamMatrixCode000644 000000 000000 00000053140 12147656566 023576 0ustar00rootroot000000 000000 //library(seqinr) //data(aaindex) distanceMatrix_(0,0) = 0; distanceMatrix_(1,0) = 112; distanceMatrix_(1,1) = 0; distanceMatrix_(2,0) = 111; distanceMatrix_(2,1) = 86; distanceMatrix_(2,2) = 0; distanceMatrix_(3,0) = 126; distanceMatrix_(3,1) = 96; distanceMatrix_(3,2) = 23; distanceMatrix_(3,3) = 0; distanceMatrix_(4,0) = 195; distanceMatrix_(4,1) = 180; distanceMatrix_(4,2) = 139; distanceMatrix_(4,3) = 154; distanceMatrix_(4,4) = 0; distanceMatrix_(5,0) = 91; distanceMatrix_(5,1) = 43; distanceMatrix_(5,2) = 46; distanceMatrix_(5,3) = 61; distanceMatrix_(5,4) = 154; distanceMatrix_(5,5) = 0; distanceMatrix_(6,0) = 107; distanceMatrix_(6,1) = 54; distanceMatrix_(6,2) = 42; distanceMatrix_(6,3) = 45; distanceMatrix_(6,4) = 170; distanceMatrix_(6,5) = 29; distanceMatrix_(6,6) = 0; distanceMatrix_(7,0) = 60; distanceMatrix_(7,1) = 125; distanceMatrix_(7,2) = 80; distanceMatrix_(7,3) = 94; distanceMatrix_(7,4) = 159; distanceMatrix_(7,5) = 87; distanceMatrix_(7,6) = 98; distanceMatrix_(7,7) = 0; distanceMatrix_(8,0) = 86; distanceMatrix_(8,1) = 29; distanceMatrix_(8,2) = 68; distanceMatrix_(8,3) = 81; distanceMatrix_(8,4) = 174; distanceMatrix_(8,5) = 24; distanceMatrix_(8,6) = 40; distanceMatrix_(8,7) = 98; distanceMatrix_(8,8) = 0; distanceMatrix_(9,0) = 94; distanceMatrix_(9,1) = 97; distanceMatrix_(9,2) = 149; distanceMatrix_(9,3) = 168; distanceMatrix_(9,4) = 198; distanceMatrix_(9,5) = 109; distanceMatrix_(9,6) = 134; distanceMatrix_(9,7) = 135; distanceMatrix_(9,8) = 94; distanceMatrix_(9,9) = 0; distanceMatrix_(10,0) = 96; distanceMatrix_(10,1) = 102; distanceMatrix_(10,2) = 153; distanceMatrix_(10,3) = 172; distanceMatrix_(10,4) = 198; distanceMatrix_(10,5) = 113; distanceMatrix_(10,6) = 138; distanceMatrix_(10,7) = 138; distanceMatrix_(10,8) = 99; distanceMatrix_(10,9) = 5; distanceMatrix_(10,10) = 0; distanceMatrix_(11,0) = 106; distanceMatrix_(11,1) = 26; distanceMatrix_(11,2) = 94; distanceMatrix_(11,3) = 101; distanceMatrix_(11,4) = 202; distanceMatrix_(11,5) = 53; distanceMatrix_(11,6) = 56; distanceMatrix_(11,7) = 127; distanceMatrix_(11,8) = 32; distanceMatrix_(11,9) = 102; distanceMatrix_(11,10) = 107; distanceMatrix_(11,11) = 0; distanceMatrix_(12,0) = 84; distanceMatrix_(12,1) = 91; distanceMatrix_(12,2) = 142; distanceMatrix_(12,3) = 160; distanceMatrix_(12,4) = 196; distanceMatrix_(12,5) = 101; distanceMatrix_(12,6) = 126; distanceMatrix_(12,7) = 127; distanceMatrix_(12,8) = 87; distanceMatrix_(12,9) = 10; distanceMatrix_(12,10) = 15; distanceMatrix_(12,11) = 95; distanceMatrix_(12,12) = 0; distanceMatrix_(13,0) = 113; distanceMatrix_(13,1) = 97; distanceMatrix_(13,2) = 158; distanceMatrix_(13,3) = 177; distanceMatrix_(13,4) = 205; distanceMatrix_(13,5) = 116; distanceMatrix_(13,6) = 140; distanceMatrix_(13,7) = 153; distanceMatrix_(13,8) = 100; distanceMatrix_(13,9) = 21; distanceMatrix_(13,10) = 22; distanceMatrix_(13,11) = 102; distanceMatrix_(13,12) = 28; distanceMatrix_(13,13) = 0; distanceMatrix_(14,0) = 27; distanceMatrix_(14,1) = 103; distanceMatrix_(14,2) = 91; distanceMatrix_(14,3) = 108; distanceMatrix_(14,4) = 169; distanceMatrix_(14,5) = 76; distanceMatrix_(14,6) = 93; distanceMatrix_(14,7) = 42; distanceMatrix_(14,8) = 77; distanceMatrix_(14,9) = 95; distanceMatrix_(14,10) = 98; distanceMatrix_(14,11) = 103; distanceMatrix_(14,12) = 87; distanceMatrix_(14,13) = 114; distanceMatrix_(14,14) = 0; distanceMatrix_(15,0) = 99; distanceMatrix_(15,1) = 110; distanceMatrix_(15,2) = 46; distanceMatrix_(15,3) = 65; distanceMatrix_(15,4) = 112; distanceMatrix_(15,5) = 68; distanceMatrix_(15,6) = 80; distanceMatrix_(15,7) = 56; distanceMatrix_(15,8) = 89; distanceMatrix_(15,9) = 142; distanceMatrix_(15,10) = 145; distanceMatrix_(15,11) = 121; distanceMatrix_(15,12) = 135; distanceMatrix_(15,13) = 155; distanceMatrix_(15,14) = 74; distanceMatrix_(15,15) = 0; distanceMatrix_(16,0) = 58; distanceMatrix_(16,1) = 71; distanceMatrix_(16,2) = 65; distanceMatrix_(16,3) = 85; distanceMatrix_(16,4) = 149; distanceMatrix_(16,5) = 42; distanceMatrix_(16,6) = 65; distanceMatrix_(16,7) = 59; distanceMatrix_(16,8) = 47; distanceMatrix_(16,9) = 89; distanceMatrix_(16,10) = 92; distanceMatrix_(16,11) = 78; distanceMatrix_(16,12) = 81; distanceMatrix_(16,13) = 103; distanceMatrix_(16,14) = 38; distanceMatrix_(16,15) = 58; distanceMatrix_(16,16) = 0; distanceMatrix_(17,0) = 148; distanceMatrix_(17,1) = 101; distanceMatrix_(17,2) = 174; distanceMatrix_(17,3) = 181; distanceMatrix_(17,4) = 215; distanceMatrix_(17,5) = 130; distanceMatrix_(17,6) = 152; distanceMatrix_(17,7) = 184; distanceMatrix_(17,8) = 115; distanceMatrix_(17,9) = 61; distanceMatrix_(17,10) = 61; distanceMatrix_(17,11) = 110; distanceMatrix_(17,12) = 67; distanceMatrix_(17,13) = 40; distanceMatrix_(17,14) = 147; distanceMatrix_(17,15) = 177; distanceMatrix_(17,16) = 128; distanceMatrix_(17,17) = 0; distanceMatrix_(18,0) = 112; distanceMatrix_(18,1) = 77; distanceMatrix_(18,2) = 143; distanceMatrix_(18,3) = 160; distanceMatrix_(18,4) = 194; distanceMatrix_(18,5) = 99; distanceMatrix_(18,6) = 122; distanceMatrix_(18,7) = 147; distanceMatrix_(18,8) = 83; distanceMatrix_(18,9) = 33; distanceMatrix_(18,10) = 36; distanceMatrix_(18,11) = 85; distanceMatrix_(18,12) = 36; distanceMatrix_(18,13) = 22; distanceMatrix_(18,14) = 110; distanceMatrix_(18,15) = 144; distanceMatrix_(18,16) = 92; distanceMatrix_(18,17) = 37; distanceMatrix_(18,18) = 0; distanceMatrix_(19,0) = 64; distanceMatrix_(19,1) = 96; distanceMatrix_(19,2) = 133; distanceMatrix_(19,3) = 152; distanceMatrix_(19,4) = 192; distanceMatrix_(19,5) = 96; distanceMatrix_(19,6) = 121; distanceMatrix_(19,7) = 109; distanceMatrix_(19,8) = 84; distanceMatrix_(19,9) = 29; distanceMatrix_(19,10) = 32; distanceMatrix_(19,11) = 97; distanceMatrix_(19,12) = 21; distanceMatrix_(19,13) = 50; distanceMatrix_(19,14) = 68; distanceMatrix_(19,15) = 124; distanceMatrix_(19,16) = 69; distanceMatrix_(19,17) = 88; distanceMatrix_(19,18) = 55; distanceMatrix_(19,19) = 0; distanceMatrix_(0,1) = -112; distanceMatrix_(0,2) = -111; distanceMatrix_(0,3) = -126; distanceMatrix_(0,4) = -195; distanceMatrix_(0,5) = -91; distanceMatrix_(0,6) = -107; distanceMatrix_(0,7) = -60; distanceMatrix_(0,8) = -86; distanceMatrix_(0,9) = -94; distanceMatrix_(0,10) = -96; distanceMatrix_(0,11) = -106; distanceMatrix_(0,12) = -84; distanceMatrix_(0,13) = -113; distanceMatrix_(0,14) = -27; distanceMatrix_(0,15) = -99; distanceMatrix_(0,16) = -58; distanceMatrix_(0,17) = -148; distanceMatrix_(0,18) = -112; distanceMatrix_(0,19) = -64; distanceMatrix_(1,2) = -86; distanceMatrix_(1,3) = -96; distanceMatrix_(1,4) = -180; distanceMatrix_(1,5) = -43; distanceMatrix_(1,6) = -54; distanceMatrix_(1,7) = -125; distanceMatrix_(1,8) = -29; distanceMatrix_(1,9) = -97; distanceMatrix_(1,10) = -102; distanceMatrix_(1,11) = -26; distanceMatrix_(1,12) = -91; distanceMatrix_(1,13) = -97; distanceMatrix_(1,14) = -103; distanceMatrix_(1,15) = -110; distanceMatrix_(1,16) = -71; distanceMatrix_(1,17) = -101; distanceMatrix_(1,18) = -77; distanceMatrix_(1,19) = -96; distanceMatrix_(2,3) = -23; distanceMatrix_(2,4) = -139; distanceMatrix_(2,5) = -46; distanceMatrix_(2,6) = -42; distanceMatrix_(2,7) = -80; distanceMatrix_(2,8) = -68; distanceMatrix_(2,9) = -149; distanceMatrix_(2,10) = -153; distanceMatrix_(2,11) = -94; distanceMatrix_(2,12) = -142; distanceMatrix_(2,13) = -158; distanceMatrix_(2,14) = -91; distanceMatrix_(2,15) = -46; distanceMatrix_(2,16) = -65; distanceMatrix_(2,17) = -174; distanceMatrix_(2,18) = -143; distanceMatrix_(2,19) = -133; distanceMatrix_(3,4) = -154; distanceMatrix_(3,5) = -61; distanceMatrix_(3,6) = -45; distanceMatrix_(3,7) = -94; distanceMatrix_(3,8) = -81; distanceMatrix_(3,9) = -168; distanceMatrix_(3,10) = -172; distanceMatrix_(3,11) = -101; distanceMatrix_(3,12) = -160; distanceMatrix_(3,13) = -177; distanceMatrix_(3,14) = -108; distanceMatrix_(3,15) = -65; distanceMatrix_(3,16) = -85; distanceMatrix_(3,17) = -181; distanceMatrix_(3,18) = -160; distanceMatrix_(3,19) = -152; distanceMatrix_(4,5) = -154; distanceMatrix_(4,6) = -170; distanceMatrix_(4,7) = -159; distanceMatrix_(4,8) = -174; distanceMatrix_(4,9) = -198; distanceMatrix_(4,10) = -198; distanceMatrix_(4,11) = -202; distanceMatrix_(4,12) = -196; distanceMatrix_(4,13) = -205; distanceMatrix_(4,14) = -169; distanceMatrix_(4,15) = -112; distanceMatrix_(4,16) = -149; distanceMatrix_(4,17) = -215; distanceMatrix_(4,18) = -194; distanceMatrix_(4,19) = -192; distanceMatrix_(5,6) = -29; distanceMatrix_(5,7) = -87; distanceMatrix_(5,8) = -24; distanceMatrix_(5,9) = -109; distanceMatrix_(5,10) = -113; distanceMatrix_(5,11) = -53; distanceMatrix_(5,12) = -101; distanceMatrix_(5,13) = -116; distanceMatrix_(5,14) = -76; distanceMatrix_(5,15) = -68; distanceMatrix_(5,16) = -42; distanceMatrix_(5,17) = -130; distanceMatrix_(5,18) = -99; distanceMatrix_(5,19) = -96; distanceMatrix_(6,7) = -98; distanceMatrix_(6,8) = -40; distanceMatrix_(6,9) = -134; distanceMatrix_(6,10) = -138; distanceMatrix_(6,11) = -56; distanceMatrix_(6,12) = -126; distanceMatrix_(6,13) = -140; distanceMatrix_(6,14) = -93; distanceMatrix_(6,15) = -80; distanceMatrix_(6,16) = -65; distanceMatrix_(6,17) = -152; distanceMatrix_(6,18) = -122; distanceMatrix_(6,19) = -121; distanceMatrix_(7,8) = -98; distanceMatrix_(7,9) = -135; distanceMatrix_(7,10) = -138; distanceMatrix_(7,11) = -127; distanceMatrix_(7,12) = -127; distanceMatrix_(7,13) = -153; distanceMatrix_(7,14) = -42; distanceMatrix_(7,15) = -56; distanceMatrix_(7,16) = -59; distanceMatrix_(7,17) = -184; distanceMatrix_(7,18) = -147; distanceMatrix_(7,19) = -109; distanceMatrix_(8,9) = -94; distanceMatrix_(8,10) = -99; distanceMatrix_(8,11) = -32; distanceMatrix_(8,12) = -87; distanceMatrix_(8,13) = -100; distanceMatrix_(8,14) = -77; distanceMatrix_(8,15) = -89; distanceMatrix_(8,16) = -47; distanceMatrix_(8,17) = -115; distanceMatrix_(8,18) = -83; distanceMatrix_(8,19) = -84; distanceMatrix_(9,10) = -5; distanceMatrix_(9,11) = -102; distanceMatrix_(9,12) = -10; distanceMatrix_(9,13) = -21; distanceMatrix_(9,14) = -95; distanceMatrix_(9,15) = -142; distanceMatrix_(9,16) = -89; distanceMatrix_(9,17) = -61; distanceMatrix_(9,18) = -33; distanceMatrix_(9,19) = -29; distanceMatrix_(10,11) = -107; distanceMatrix_(10,12) = -15; distanceMatrix_(10,13) = -22; distanceMatrix_(10,14) = -98; distanceMatrix_(10,15) = -145; distanceMatrix_(10,16) = -92; distanceMatrix_(10,17) = -61; distanceMatrix_(10,18) = -36; distanceMatrix_(10,19) = -32; distanceMatrix_(11,12) = -95; distanceMatrix_(11,13) = -102; distanceMatrix_(11,14) = -103; distanceMatrix_(11,15) = -121; distanceMatrix_(11,16) = -78; distanceMatrix_(11,17) = -110; distanceMatrix_(11,18) = -85; distanceMatrix_(11,19) = -97; distanceMatrix_(12,13) = -28; distanceMatrix_(12,14) = -87; distanceMatrix_(12,15) = -135; distanceMatrix_(12,16) = -81; distanceMatrix_(12,17) = -67; distanceMatrix_(12,18) = -36; distanceMatrix_(12,19) = -21; distanceMatrix_(13,14) = -114; distanceMatrix_(13,15) = -155; distanceMatrix_(13,16) = -103; distanceMatrix_(13,17) = -40; distanceMatrix_(13,18) = -22; distanceMatrix_(13,19) = -50; distanceMatrix_(14,15) = -74; distanceMatrix_(14,16) = -38; distanceMatrix_(14,17) = -147; distanceMatrix_(14,18) = -110; distanceMatrix_(14,19) = -68; distanceMatrix_(15,16) = -58; distanceMatrix_(15,17) = -177; distanceMatrix_(15,18) = -144; distanceMatrix_(15,19) = -124; distanceMatrix_(16,17) = -128; distanceMatrix_(16,18) = -92; distanceMatrix_(16,19) = -69; distanceMatrix_(17,18) = -37; distanceMatrix_(17,19) = -88; distanceMatrix_(18,19) = -55; //data<-data.frame(composition=aaindex[["GRAR740101"]]$I,polarity=aaindex[["GRAR740102"]]$I,volume=aaindex[["GRAR740103"]]$I) //library(ade4) //pca<-dudi.pca(data) // //plot(pca$li[,1:2],type="n") //text(pca$li[,1:2],rownames(data)) // //s.corcircle(pca$co) //layout(matrix(1:3,nrow=1)) //a1<-pca$li[,1]; names(a1)<-rownames(data); dotchart(sort(a1)) //a2<-pca$li[,2]; names(a2)<-rownames(data); dotchart(sort(a2)) //a3<-pca$li[,3]; names(a3)<-rownames(data); dotchart(sort(a3)) // //x<-pca$li[,1] //m<-matrix(nrow=20, ncol=20) //unlink("tmp.cpp") //for(i in 1:length(x)) // for(j in 1:length(x)) // { // m[i,j]<-sign(x[j] - x[i]) // cat("signMatrix_(",(i-1),", ",(j-1),") = ", m[i,j], ";\n",sep="",file="tmp.cpp",append=TRUE) // } signMatrix_(0, 0) = 0; signMatrix_(0, 1) = 1; signMatrix_(0, 2) = -1; signMatrix_(0, 3) = -1; signMatrix_(0, 4) = -1; signMatrix_(0, 5) = -1; signMatrix_(0, 6) = -1; signMatrix_(0, 7) = -1; signMatrix_(0, 8) = -1; signMatrix_(0, 9) = 1; signMatrix_(0, 10) = 1; signMatrix_(0, 11) = 1; signMatrix_(0, 12) = 1; signMatrix_(0, 13) = 1; signMatrix_(0, 14) = -1; signMatrix_(0, 15) = -1; signMatrix_(0, 16) = -1; signMatrix_(0, 17) = 1; signMatrix_(0, 18) = 1; signMatrix_(0, 19) = 1; signMatrix_(1, 0) = -1; signMatrix_(1, 1) = 0; signMatrix_(1, 2) = -1; signMatrix_(1, 3) = -1; signMatrix_(1, 4) = -1; signMatrix_(1, 5) = -1; signMatrix_(1, 6) = -1; signMatrix_(1, 7) = -1; signMatrix_(1, 8) = -1; signMatrix_(1, 9) = 1; signMatrix_(1, 10) = 1; signMatrix_(1, 11) = 1; signMatrix_(1, 12) = 1; signMatrix_(1, 13) = 1; signMatrix_(1, 14) = -1; signMatrix_(1, 15) = -1; signMatrix_(1, 16) = -1; signMatrix_(1, 17) = 1; signMatrix_(1, 18) = 1; signMatrix_(1, 19) = 1; signMatrix_(2, 0) = 1; signMatrix_(2, 1) = 1; signMatrix_(2, 2) = 0; signMatrix_(2, 3) = -1; signMatrix_(2, 4) = -1; signMatrix_(2, 5) = 1; signMatrix_(2, 6) = 1; signMatrix_(2, 7) = 1; signMatrix_(2, 8) = 1; signMatrix_(2, 9) = 1; signMatrix_(2, 10) = 1; signMatrix_(2, 11) = 1; signMatrix_(2, 12) = 1; signMatrix_(2, 13) = 1; signMatrix_(2, 14) = 1; signMatrix_(2, 15) = 1; signMatrix_(2, 16) = 1; signMatrix_(2, 17) = 1; signMatrix_(2, 18) = 1; signMatrix_(2, 19) = 1; signMatrix_(3, 0) = 1; signMatrix_(3, 1) = 1; signMatrix_(3, 2) = 1; signMatrix_(3, 3) = 0; signMatrix_(3, 4) = 1; signMatrix_(3, 5) = 1; signMatrix_(3, 6) = 1; signMatrix_(3, 7) = 1; signMatrix_(3, 8) = 1; signMatrix_(3, 9) = 1; signMatrix_(3, 10) = 1; signMatrix_(3, 11) = 1; signMatrix_(3, 12) = 1; signMatrix_(3, 13) = 1; signMatrix_(3, 14) = 1; signMatrix_(3, 15) = 1; signMatrix_(3, 16) = 1; signMatrix_(3, 17) = 1; signMatrix_(3, 18) = 1; signMatrix_(3, 19) = 1; signMatrix_(4, 0) = 1; signMatrix_(4, 1) = 1; signMatrix_(4, 2) = 1; signMatrix_(4, 3) = -1; signMatrix_(4, 4) = 0; signMatrix_(4, 5) = 1; signMatrix_(4, 6) = 1; signMatrix_(4, 7) = 1; signMatrix_(4, 8) = 1; signMatrix_(4, 9) = 1; signMatrix_(4, 10) = 1; signMatrix_(4, 11) = 1; signMatrix_(4, 12) = 1; signMatrix_(4, 13) = 1; signMatrix_(4, 14) = 1; signMatrix_(4, 15) = 1; signMatrix_(4, 16) = 1; signMatrix_(4, 17) = 1; signMatrix_(4, 18) = 1; signMatrix_(4, 19) = 1; signMatrix_(5, 0) = 1; signMatrix_(5, 1) = 1; signMatrix_(5, 2) = -1; signMatrix_(5, 3) = -1; signMatrix_(5, 4) = -1; signMatrix_(5, 5) = 0; signMatrix_(5, 6) = -1; signMatrix_(5, 7) = -1; signMatrix_(5, 8) = 1; signMatrix_(5, 9) = 1; signMatrix_(5, 10) = 1; signMatrix_(5, 11) = 1; signMatrix_(5, 12) = 1; signMatrix_(5, 13) = 1; signMatrix_(5, 14) = 1; signMatrix_(5, 15) = -1; signMatrix_(5, 16) = 1; signMatrix_(5, 17) = 1; signMatrix_(5, 18) = 1; signMatrix_(5, 19) = 1; signMatrix_(6, 0) = 1; signMatrix_(6, 1) = 1; signMatrix_(6, 2) = -1; signMatrix_(6, 3) = -1; signMatrix_(6, 4) = -1; signMatrix_(6, 5) = 1; signMatrix_(6, 6) = 0; signMatrix_(6, 7) = -1; signMatrix_(6, 8) = 1; signMatrix_(6, 9) = 1; signMatrix_(6, 10) = 1; signMatrix_(6, 11) = 1; signMatrix_(6, 12) = 1; signMatrix_(6, 13) = 1; signMatrix_(6, 14) = 1; signMatrix_(6, 15) = -1; signMatrix_(6, 16) = 1; signMatrix_(6, 17) = 1; signMatrix_(6, 18) = 1; signMatrix_(6, 19) = 1; signMatrix_(7, 0) = 1; signMatrix_(7, 1) = 1; signMatrix_(7, 2) = -1; signMatrix_(7, 3) = -1; signMatrix_(7, 4) = -1; signMatrix_(7, 5) = 1; signMatrix_(7, 6) = 1; signMatrix_(7, 7) = 0; signMatrix_(7, 8) = 1; signMatrix_(7, 9) = 1; signMatrix_(7, 10) = 1; signMatrix_(7, 11) = 1; signMatrix_(7, 12) = 1; signMatrix_(7, 13) = 1; signMatrix_(7, 14) = 1; signMatrix_(7, 15) = -1; signMatrix_(7, 16) = 1; signMatrix_(7, 17) = 1; signMatrix_(7, 18) = 1; signMatrix_(7, 19) = 1; signMatrix_(8, 0) = 1; signMatrix_(8, 1) = 1; signMatrix_(8, 2) = -1; signMatrix_(8, 3) = -1; signMatrix_(8, 4) = -1; signMatrix_(8, 5) = -1; signMatrix_(8, 6) = -1; signMatrix_(8, 7) = -1; signMatrix_(8, 8) = 0; signMatrix_(8, 9) = 1; signMatrix_(8, 10) = 1; signMatrix_(8, 11) = 1; signMatrix_(8, 12) = 1; signMatrix_(8, 13) = 1; signMatrix_(8, 14) = -1; signMatrix_(8, 15) = -1; signMatrix_(8, 16) = -1; signMatrix_(8, 17) = 1; signMatrix_(8, 18) = 1; signMatrix_(8, 19) = 1; signMatrix_(9, 0) = -1; signMatrix_(9, 1) = -1; signMatrix_(9, 2) = -1; signMatrix_(9, 3) = -1; signMatrix_(9, 4) = -1; signMatrix_(9, 5) = -1; signMatrix_(9, 6) = -1; signMatrix_(9, 7) = -1; signMatrix_(9, 8) = -1; signMatrix_(9, 9) = 0; signMatrix_(9, 10) = 1; signMatrix_(9, 11) = -1; signMatrix_(9, 12) = -1; signMatrix_(9, 13) = 1; signMatrix_(9, 14) = -1; signMatrix_(9, 15) = -1; signMatrix_(9, 16) = -1; signMatrix_(9, 17) = 1; signMatrix_(9, 18) = -1; signMatrix_(9, 19) = -1; signMatrix_(10, 0) = -1; signMatrix_(10, 1) = -1; signMatrix_(10, 2) = -1; signMatrix_(10, 3) = -1; signMatrix_(10, 4) = -1; signMatrix_(10, 5) = -1; signMatrix_(10, 6) = -1; signMatrix_(10, 7) = -1; signMatrix_(10, 8) = -1; signMatrix_(10, 9) = -1; signMatrix_(10, 10) = 0; signMatrix_(10, 11) = -1; signMatrix_(10, 12) = -1; signMatrix_(10, 13) = 1; signMatrix_(10, 14) = -1; signMatrix_(10, 15) = -1; signMatrix_(10, 16) = -1; signMatrix_(10, 17) = 1; signMatrix_(10, 18) = -1; signMatrix_(10, 19) = -1; signMatrix_(11, 0) = -1; signMatrix_(11, 1) = -1; signMatrix_(11, 2) = -1; signMatrix_(11, 3) = -1; signMatrix_(11, 4) = -1; signMatrix_(11, 5) = -1; signMatrix_(11, 6) = -1; signMatrix_(11, 7) = -1; signMatrix_(11, 8) = -1; signMatrix_(11, 9) = 1; signMatrix_(11, 10) = 1; signMatrix_(11, 11) = 0; signMatrix_(11, 12) = 1; signMatrix_(11, 13) = 1; signMatrix_(11, 14) = -1; signMatrix_(11, 15) = -1; signMatrix_(11, 16) = -1; signMatrix_(11, 17) = 1; signMatrix_(11, 18) = 1; signMatrix_(11, 19) = 1; signMatrix_(12, 0) = -1; signMatrix_(12, 1) = -1; signMatrix_(12, 2) = -1; signMatrix_(12, 3) = -1; signMatrix_(12, 4) = -1; signMatrix_(12, 5) = -1; signMatrix_(12, 6) = -1; signMatrix_(12, 7) = -1; signMatrix_(12, 8) = -1; signMatrix_(12, 9) = 1; signMatrix_(12, 10) = 1; signMatrix_(12, 11) = -1; signMatrix_(12, 12) = 0; signMatrix_(12, 13) = 1; signMatrix_(12, 14) = -1; signMatrix_(12, 15) = -1; signMatrix_(12, 16) = -1; signMatrix_(12, 17) = 1; signMatrix_(12, 18) = 1; signMatrix_(12, 19) = -1; signMatrix_(13, 0) = -1; signMatrix_(13, 1) = -1; signMatrix_(13, 2) = -1; signMatrix_(13, 3) = -1; signMatrix_(13, 4) = -1; signMatrix_(13, 5) = -1; signMatrix_(13, 6) = -1; signMatrix_(13, 7) = -1; signMatrix_(13, 8) = -1; signMatrix_(13, 9) = -1; signMatrix_(13, 10) = -1; signMatrix_(13, 11) = -1; signMatrix_(13, 12) = -1; signMatrix_(13, 13) = 0; signMatrix_(13, 14) = -1; signMatrix_(13, 15) = -1; signMatrix_(13, 16) = -1; signMatrix_(13, 17) = 1; signMatrix_(13, 18) = -1; signMatrix_(13, 19) = -1; signMatrix_(14, 0) = 1; signMatrix_(14, 1) = 1; signMatrix_(14, 2) = -1; signMatrix_(14, 3) = -1; signMatrix_(14, 4) = -1; signMatrix_(14, 5) = -1; signMatrix_(14, 6) = -1; signMatrix_(14, 7) = -1; signMatrix_(14, 8) = 1; signMatrix_(14, 9) = 1; signMatrix_(14, 10) = 1; signMatrix_(14, 11) = 1; signMatrix_(14, 12) = 1; signMatrix_(14, 13) = 1; signMatrix_(14, 14) = 0; signMatrix_(14, 15) = -1; signMatrix_(14, 16) = -1; signMatrix_(14, 17) = 1; signMatrix_(14, 18) = 1; signMatrix_(14, 19) = 1; signMatrix_(15, 0) = 1; signMatrix_(15, 1) = 1; signMatrix_(15, 2) = -1; signMatrix_(15, 3) = -1; signMatrix_(15, 4) = -1; signMatrix_(15, 5) = 1; signMatrix_(15, 6) = 1; signMatrix_(15, 7) = 1; signMatrix_(15, 8) = 1; signMatrix_(15, 9) = 1; signMatrix_(15, 10) = 1; signMatrix_(15, 11) = 1; signMatrix_(15, 12) = 1; signMatrix_(15, 13) = 1; signMatrix_(15, 14) = 1; signMatrix_(15, 15) = 0; signMatrix_(15, 16) = 1; signMatrix_(15, 17) = 1; signMatrix_(15, 18) = 1; signMatrix_(15, 19) = 1; signMatrix_(16, 0) = 1; signMatrix_(16, 1) = 1; signMatrix_(16, 2) = -1; signMatrix_(16, 3) = -1; signMatrix_(16, 4) = -1; signMatrix_(16, 5) = -1; signMatrix_(16, 6) = -1; signMatrix_(16, 7) = -1; signMatrix_(16, 8) = 1; signMatrix_(16, 9) = 1; signMatrix_(16, 10) = 1; signMatrix_(16, 11) = 1; signMatrix_(16, 12) = 1; signMatrix_(16, 13) = 1; signMatrix_(16, 14) = 1; signMatrix_(16, 15) = -1; signMatrix_(16, 16) = 0; signMatrix_(16, 17) = 1; signMatrix_(16, 18) = 1; signMatrix_(16, 19) = 1; signMatrix_(17, 0) = -1; signMatrix_(17, 1) = -1; signMatrix_(17, 2) = -1; signMatrix_(17, 3) = -1; signMatrix_(17, 4) = -1; signMatrix_(17, 5) = -1; signMatrix_(17, 6) = -1; signMatrix_(17, 7) = -1; signMatrix_(17, 8) = -1; signMatrix_(17, 9) = -1; signMatrix_(17, 10) = -1; signMatrix_(17, 11) = -1; signMatrix_(17, 12) = -1; signMatrix_(17, 13) = -1; signMatrix_(17, 14) = -1; signMatrix_(17, 15) = -1; signMatrix_(17, 16) = -1; signMatrix_(17, 17) = 0; signMatrix_(17, 18) = -1; signMatrix_(17, 19) = -1; signMatrix_(18, 0) = -1; signMatrix_(18, 1) = -1; signMatrix_(18, 2) = -1; signMatrix_(18, 3) = -1; signMatrix_(18, 4) = -1; signMatrix_(18, 5) = -1; signMatrix_(18, 6) = -1; signMatrix_(18, 7) = -1; signMatrix_(18, 8) = -1; signMatrix_(18, 9) = 1; signMatrix_(18, 10) = 1; signMatrix_(18, 11) = -1; signMatrix_(18, 12) = -1; signMatrix_(18, 13) = 1; signMatrix_(18, 14) = -1; signMatrix_(18, 15) = -1; signMatrix_(18, 16) = -1; signMatrix_(18, 17) = 1; signMatrix_(18, 18) = 0; signMatrix_(18, 19) = -1; signMatrix_(19, 0) = -1; signMatrix_(19, 1) = -1; signMatrix_(19, 2) = -1; signMatrix_(19, 3) = -1; signMatrix_(19, 4) = -1; signMatrix_(19, 5) = -1; signMatrix_(19, 6) = -1; signMatrix_(19, 7) = -1; signMatrix_(19, 8) = -1; signMatrix_(19, 9) = 1; signMatrix_(19, 10) = 1; signMatrix_(19, 11) = -1; signMatrix_(19, 12) = 1; signMatrix_(19, 13) = 1; signMatrix_(19, 14) = -1; signMatrix_(19, 15) = -1; signMatrix_(19, 16) = -1; signMatrix_(19, 17) = 1; signMatrix_(19, 18) = 1; signMatrix_(19, 19) = 0; bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/BLOSUM50.h000644 000000 000000 00000006237 12147656566 021160 0ustar00rootroot000000 000000 // // File: BLOSUM50.h // Created by: Julien Dutheil // Created on: Tue Jan 18 10:28 2007 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _BLOSUM50_H_ #define _BLOSUM50_H_ // from the STL: #include #include "AlphabetIndex2.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetExceptions.h" #include #include namespace bpp { /** * @brief BLOSUM 50 Substitution Matrix. * * Reference: * Henikoff, S. and Henikoff, J.G. * Amino acid substitution matrices from protein blocks * Proc. Natl. Acad. Sci. USA 89, 10915-10919 (1992) * * Data from AAIndex2 database, Accession Number HENS920104. */ class BLOSUM50 : public virtual AlphabetIndex2 { private: LinearMatrix distanceMatrix_; const ProteicAlphabet* alpha_; public: BLOSUM50(); BLOSUM50(const BLOSUM50& blosum) : distanceMatrix_(blosum.distanceMatrix_), alpha_(blosum.alpha_) {} BLOSUM50& operator=(const BLOSUM50& blosum) { distanceMatrix_ = blosum.distanceMatrix_; alpha_ = blosum.alpha_; return *this; } virtual ~BLOSUM50() {} public: /** * @name Methods from the AlphabetIndex2 interface. * * @{ */ double getIndex(int state1, int state2) const throw (BadIntException); double getIndex(const std::string& state1, const std::string& state2) const throw (BadCharException); const Alphabet* getAlphabet() const { return alpha_; } BLOSUM50* clone() const { return new BLOSUM50(); } LinearMatrix* getIndexMatrix() const; /** @} */ }; } // end of namespace bpp. #endif // _BLOSUM50_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/DefaultNucleotideScore.h000644 000000 000000 00000006756 12147656566 024414 0ustar00rootroot000000 000000 // // File: DefaultNucleotideScore.h // Created by: Julien Dutheil // Created on: Fri Jan 19 10:30 2007 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _DEFAULTNUCLEOTIDESCORE_H_ #define _DEFAULTNUCLEOTIDESCORE_H_ // from the STL: #include #include "AlphabetIndex2.h" #include "../Alphabet/NucleicAlphabet.h" #include "../Alphabet/AlphabetExceptions.h" #include #include namespace bpp { /** * @brief Default Substitution Matrix for nucleotide alignments. */ class DefaultNucleotideScore : public virtual AlphabetIndex2 { private: LinearMatrix distanceMatrix_; const NucleicAlphabet* alpha_; public: /** * @brief Build a new DefaultNucleotideScore object. * * @param alphabet The alphabet to use. */ DefaultNucleotideScore(const NucleicAlphabet* alphabet); DefaultNucleotideScore(const DefaultNucleotideScore& dns) : distanceMatrix_(dns.distanceMatrix_), alpha_(dns.alpha_) {} DefaultNucleotideScore& operator=(const DefaultNucleotideScore& dns) { distanceMatrix_ = dns.distanceMatrix_; alpha_ = dns.alpha_; return *this; } virtual ~DefaultNucleotideScore() {} public: /** * @name Methods from the AlphabetIndex2 interface. * * @{ */ /** * @copydoc bpp::AlphabetIndex2::getIndex() * * If states are unresolved, takes the best score of all possible matches * and divides it by the number of different states. * @author Sylvain Gaillard */ double getIndex(int state1, int state2) const throw (BadIntException); double getIndex(const std::string& state1, const std::string& state2) const throw (BadCharException); const Alphabet* getAlphabet() const { return alpha_; } DefaultNucleotideScore* clone() const { return new DefaultNucleotideScore(*this); } LinearMatrix* getIndexMatrix() const; /** @} */ }; } // end of namespace bpp. #endif // _DEFAULTNUCLEOTIDESCORE_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AASEA1030Index.h000644 000000 000000 00000007116 12147656566 022115 0ustar00rootroot000000 000000 // // File: AASEA1030Index.h // Created by: Bastien Boussau // Created on: Fri Jan 14 10:31 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _AASEA1030INDEX_H_ #define _AASEA1030INDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetTools.h" namespace bpp { /** * @brief Percentage of amino acids having a Solvent Exposed Area between 10 and 30 Angström^2 for each type of amino acid, according to http://prowl.rockefeller.edu/aainfo/access.htm. * * */ class AASEA1030Index : public AlphabetIndex1 { private: std::vector sea1030_; public: AASEA1030Index() : sea1030_() { sea1030_.resize(20); sea1030_[ 0] = 0.17; // A sea1030_[ 1] = 0.11; // R sea1030_[ 2] = 0.08; // N sea1030_[ 3] = 0.10; // D sea1030_[ 4] = 0.14; // C sea1030_[ 5] = 0.09; // Q sea1030_[ 6] = 0.03; // E sea1030_[ 7] = 0.13; // G sea1030_[ 8] = 0.15; // H sea1030_[ 9] = 0.14; // I sea1030_[10] = 0.10; // L sea1030_[11] = 0.05; // K sea1030_[12] = 0.36; // M sea1030_[13] = 0.16; // F sea1030_[14] = 0.09; // P sea1030_[15] = 0.10; // S sea1030_[16] = 0.13; // T sea1030_[17] = 0.07; // W sea1030_[18] = 0.13; // Y sea1030_[19] = 0.10; // V } virtual ~AASEA1030Index() {} AASEA1030Index* clone() const { return new AASEA1030Index(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "AASEA1030Index::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return sea1030_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return sea1030_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(sea1030_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _AASEA1030INDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AlphabetIndex2.h000644 000000 000000 00000006240 12147656566 022576 0ustar00rootroot000000 000000 // // File: AlphabetIndex2.h // Created by: Julien Dutheil // Created on: Mon Feb 21 17:42 2005 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ALPHABETINDEX2_H_ #define _ALPHABETINDEX2_H_ #include "../Alphabet/Alphabet.h" #include #include // From the STL: #include namespace bpp { /** * @brief Two dimensionnal alphabet index interface. * * Derivatives of this interface implement distances between two states. */ class AlphabetIndex2 : public Clonable { public: AlphabetIndex2() {} virtual ~AlphabetIndex2() {} public: /** * @brief Get the index associated to a pair of states. * * @param state1 First state to consider, as a int value. * @param state2 Second state to consider, as a int value. * @return The index associated to the pair of states */ virtual double getIndex(int state1, int state2) const = 0; /** * @brief Get the index associated to a pair of states. * * @param state1 First state to consider, as a string value. * @param state2 Second state to consider, as a string value. * @return The index associated to the pair of states */ virtual double getIndex(const std::string& state1, const std::string& state2) const = 0; /** * @brief Get the alphabet associated to this index. * * @return Alphabet The alphabet associated to this index. */ virtual const Alphabet* getAlphabet() const = 0; /** * @return A matrix object with all indices. */ virtual Matrix* getIndexMatrix() const = 0; }; } // end of namespace bpp. #endif // _ALPHABETINDEX2_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AAChenGuHuangHydrophobicityIndex.h000644 000000 000000 00000010323 12147656566 026250 0ustar00rootroot000000 000000 // // File: AAChenGuHuangHydrophobicityIndex.h // Created by: Bastien Boussau // Created on: Fri Jan 14 10:31 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _AACHENGUHUANGHYDROPHOBICITYINDEX_H_ #define _AACHENGUHUANGHYDROPHOBICITYINDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetTools.h" namespace bpp { /** * @brief Hydrophobicity of each amino acid, according to Table 1 in Chen, Gu and Huang, BMC Bioinformatics 2006. * * @code * Database: AAChenGuHuangHydrophobicity * Entry: CHENH06 * * H FAUJ880111 * D Hydrophobicity (Chen et al., 2006) * R PMCID:PMC1780123 PMID:17217506 * A Hang Chen, Fei Gu, and Zhengge Huang. * T Improved Chou-Fasman method for protein secondary structure prediction * J BMC Bioinformatics. 2006; 7(Suppl 4): S14. (2006) * // * @endcode * */ class AAChenGuHuangHydrophobicityIndex : public AlphabetIndex1 { private: std::vector hydrophobicity_; public: AAChenGuHuangHydrophobicityIndex() : hydrophobicity_() { hydrophobicity_.resize(20); hydrophobicity_[ 0] = 0.87; // A hydrophobicity_[ 1] = 0.85; // R hydrophobicity_[ 2] = 0.09; // N hydrophobicity_[ 3] = 0.66; // D hydrophobicity_[ 4] = 1.52; // C hydrophobicity_[ 5] = 0.00; // Q hydrophobicity_[ 6] = 0.67; // E hydrophobicity_[ 7] = 0.00; // G hydrophobicity_[ 8] = 0.87; // H hydrophobicity_[ 9] = 3.15; // I hydrophobicity_[10] = 2.17; // L hydrophobicity_[11] = 1.64; // K hydrophobicity_[12] = 1.67; // M hydrophobicity_[13] = 2.87; // F hydrophobicity_[14] = 2.77; // P hydrophobicity_[15] = 0.07; // S hydrophobicity_[16] = 0.07; // T hydrophobicity_[17] = 3.77; // W hydrophobicity_[18] = 2.76; // Y hydrophobicity_[19] = 1.87; // V } virtual ~AAChenGuHuangHydrophobicityIndex() {} AAChenGuHuangHydrophobicityIndex* clone() const { return new AAChenGuHuangHydrophobicityIndex(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "AAChenGuHuangHydrophobicityIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return hydrophobicity_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return hydrophobicity_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(hydrophobicity_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _AACHENGUHUANGHYDROPHOBICITYINDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/MiyataAAChemicalDistance.h000644 000000 000000 00000007274 12147656566 024543 0ustar00rootroot000000 000000 // // File: MiyataAAChemicalDistance.h // Created by: jdutheil // Created on: Mon Feb 21 17:42 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) Julien.Dutheil@univ-montp2.fr This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _MIYATAAACHEMICALDISTANCE_H_ #define _MIYATAAACHEMICALDISTANCE_H_ // from the STL: #include #include "AlphabetIndex2.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetExceptions.h" #include #include namespace bpp { /** * @brief Miyata et al. (1979) Amino-Acid chemical distance. * * Two kinds of matrix can be built: * - a symmetric one, with \f$I_{i,j} = I_{i,j}\f$, * - or a non-symmetric one, with \f$I_{i,j} = -I_{i,j}\f$. * * Reference: * Miyata, T., Miyazawa, S. and Yasunaga, T. * Two types of amino acid substitutions in protein evolution * J. Mol. Evol. 12, 219-236 (1979) * * Data from AAIndex2 database, Accession Number MIYT790101. */ class MiyataAAChemicalDistance : public virtual AlphabetIndex2 { private: LinearMatrix distanceMatrix_; const ProteicAlphabet* alpha_; bool sym_; public: MiyataAAChemicalDistance(); MiyataAAChemicalDistance(const MiyataAAChemicalDistance& md) : distanceMatrix_(md.distanceMatrix_), alpha_(md.alpha_), sym_(md.sym_) {} MiyataAAChemicalDistance& operator=(const MiyataAAChemicalDistance& md) { distanceMatrix_ = md.distanceMatrix_; alpha_ = md.alpha_; sym_ = md.sym_; return *this; } virtual ~MiyataAAChemicalDistance() {} MiyataAAChemicalDistance* clone() const { return new MiyataAAChemicalDistance(); } public: /** * @name Methods from the AlphabetIndex2 interface. * * @{ */ double getIndex(int state1, int state2) const throw (BadIntException); double getIndex(const std::string& state1, const std::string& state2) const throw (BadCharException); const Alphabet* getAlphabet() const { return alpha_; } Matrix* getIndexMatrix() const; /** @} */ public: void setSymmetric(bool yn) { sym_ = yn; } bool isSymmetric() const { return sym_; } }; } // end of namespace bpp. #endif // _MIYATAAACHEMICALDISTANCE_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AlphabetIndex1.h000644 000000 000000 00000005733 12147656566 022603 0ustar00rootroot000000 000000 // // File: AlphabetIndex1.h // Created by: Julien Dutheil // Created on: Mon Feb 21 17:42 2005 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) Julien.Dutheil@univ-montp2.fr This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ALPHABETINDEX1_H_ #define _ALPHABETINDEX1_H_ #include "../Alphabet/Alphabet.h" #include // From the STL: #include namespace bpp { /** * @brief One dimensionnal alphabet index interface. * * Derivatives of this interface implement properties for a single state. */ class AlphabetIndex1 : public virtual Clonable { public: AlphabetIndex1() {} virtual ~AlphabetIndex1() {} public: /** * @brief Get the index associated to a state. * * @param state The state to consider, as a int value. * @return The index associated to the state */ virtual double getIndex(int state) const = 0; /** * @brief Get the index associated to a state. * * @param state The state to consider, as a string value. * @return The index associated to the state */ virtual double getIndex(const std::string& state) const = 0; /** * @brief Get the alphabet associated to this index. * * @return Alphabet The alphabet associated to this index. */ virtual const Alphabet* getAlphabet() const = 0; /** * @return A vector object with all indices. */ virtual std::vector* getIndexVector() const = 0; }; } // end of namespace bpp. #endif // _ALPHABETINDEX1_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AAMassIndex.h000644 000000 000000 00000006651 12147656566 022107 0ustar00rootroot000000 000000 // // File: AAMassIndex.h // Created by: Bastien Boussau // Created on: Fri Jan 14 10:31 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _AAMASSINDEX_H_ #define _AAMASSINDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetTools.h" namespace bpp { /** * @brief Mass (dalton) of each amino acid, according to http://www.imb-jena.de/IMAGE_AA.html. * * */ class AAMassIndex : public AlphabetIndex1 { private: std::vector mass_; public: AAMassIndex() : mass_() { mass_.resize(20); mass_[ 0] = 71.09; // A mass_[ 1] = 156.19; // R mass_[ 2] = 114.11; // N mass_[ 3] = 115.09; // D mass_[ 4] = 103.15; // C mass_[ 5] = 128.14; // Q mass_[ 6] = 129.12; // E mass_[ 7] = 57.05; // G mass_[ 8] = 137.14; // H mass_[ 9] = 113.16; // I mass_[10] = 113.16; // L mass_[11] = 128.17; // K mass_[12] = 131.19; // M mass_[13] = 147.18; // F mass_[14] = 97.12; // P mass_[15] = 87.08; // S mass_[16] = 101.11; // T mass_[17] = 186.12; // W mass_[18] = 163.18; // Y mass_[19] = 99.14; // V } virtual ~AAMassIndex() {} AAMassIndex* clone() const { return new AAMassIndex(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "AAMassIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return mass_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return mass_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(mass_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _AAMASSINDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AASEAInf10Index.h000644 000000 000000 00000007151 12147656566 022406 0ustar00rootroot000000 000000 // // File: AASEAInf10Index.h // Created by: Bastien Boussau // Created on: Fri Jan 14 10:31 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _AASEAINF10INDEX_H_ #define _AASEAINF10INDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetTools.h" namespace bpp { /** * @brief Percentage of amino acids having a Solvent Exposed Area below 10 Angström^2 for each type of amino acid, according to http://prowl.rockefeller.edu/aainfo/access.htm. * * */ class AASEAInf10Index : public AlphabetIndex1 { private: std::vector seaInf10_; public: AASEAInf10Index() : seaInf10_() { seaInf10_.resize(20); seaInf10_[ 0] = 0.35; // A seaInf10_[ 1] = 0.05; // R seaInf10_[ 2] = 0.10; // N seaInf10_[ 3] = 0.09; // D seaInf10_[ 4] = 0.54; // C seaInf10_[ 5] = 0.10; // Q seaInf10_[ 6] = 0.04; // E seaInf10_[ 7] = 0.36; // G seaInf10_[ 8] = 0.19; // H seaInf10_[ 9] = 0.47; // I seaInf10_[10] = 0.49; // L seaInf10_[11] = 0.02; // K seaInf10_[12] = 0.20; // M seaInf10_[13] = 0.42; // F seaInf10_[14] = 0.13; // P seaInf10_[15] = 0.20; // S seaInf10_[16] = 0.16; // T seaInf10_[17] = 0.44; // W seaInf10_[18] = 0.20; // Y seaInf10_[19] = 0.50; // V } virtual ~AASEAInf10Index() {} AASEAInf10Index* clone() const { return new AASEAInf10Index(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "AASEAInf10Index::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return seaInf10_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return seaInf10_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(seaInf10_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _AASEAINF10INDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AAIndex2Entry.cpp000644 000000 000000 00000006367 12147656566 022726 0ustar00rootroot000000 000000 // // File: AAIndex2Entry.cpp // Created by: Julien Dutheil // Created on: Fri Jan 19 17:07 2007 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "AAIndex2Entry.h" #include "../Alphabet/AlphabetTools.h" using namespace bpp; using namespace std; #include #include #include AAIndex2Entry::AAIndex2Entry(std::istream& input, bool sym) throw (IOException) : property_(20, 20), alpha_(&AlphabetTools::PROTEIN_ALPHABET) { // Parse entry: string line; bool ok = false; bool diag = false; do { line = FileTools::getNextLine(input); if (line[0] == 'M') { for (unsigned int i = 0; i < 20; i++) { line = FileTools::getNextLine(input); StringTokenizer st1(line, " "); if (i == 0 && st1.numberOfRemainingTokens() == 1) { // Lower triangle only: diag = true; } // Amino acids are in the same order in the AAIndex1 database than in the ProteicAlphabet class: if (diag) { if (st1.numberOfRemainingTokens() != i + 1) break; for (unsigned int j = 0; j <= i; j++) { property_(i, j) = TextTools::toDouble(st1.nextToken()); } } else { if (st1.numberOfRemainingTokens() != 20) break; for (unsigned int j = 0; j < 20; j++) { property_(i, j) = TextTools::toDouble(st1.nextToken()); } } } // Jump to next entry... FileTools::getNextLine(input); ok = true; } } while (!ok); if (!ok) throw IOException("AAIndex2Entry: invalid AAIndex2 entry."); } bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.h000644 000000 000000 00000012672 12147656566 025056 0ustar00rootroot000000 000000 // // File: GranthamAAChemicalDistance.h // Created by: Julien Dutheil // Created on: Mon Feb 21 17:42 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _GRANTHAMAACHEMICALDISTANCE_H_ #define _GRANTHAMAACHEMICALDISTANCE_H_ // from the STL: #include #include "AlphabetIndex2.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetExceptions.h" #include #include namespace bpp { /** * @brief Grantham (1974) Amino-Acid chemical distance. * * Two kinds of matrix can be built: * - a symmetric one, with \f$I_{i,j} = I_{i,j}\f$, * - or a non-symmetric one, with \f$I_{i,j} = -I_{i,j}\f$. * In the second case, which one of the two entries between \f$I_{i,j}\f$ and \f$I_{i,j}\f$ is positive is arbitrary by default. * It is also possible to use the coordinate on a principal component analysis between the elementary propoerties of the distance instead (setPC1Sign(true)). * The following R code was use in order to get those signs: * @code * library(seqinr) * data(aaindex) * data<-data.frame(composition=aaindex[["GRAR740101"]]$I, * polarity=aaindex[["GRAR740102"]]$I, * volume=aaindex[["GRAR740103"]]$I) * library(ade4) * pca<-dudi.pca(data) * * plot(pca$li[, 1:2], type="n") * text(pca$li[, 1:2], rownames(data)) * * s.corcircle(pca$co) * layout(matrix(1:3,nrow=1)) * a1<-pca$li[,1]; names(a1)<-rownames(data); dotchart(sort(a1)) * a2<-pca$li[,2]; names(a2)<-rownames(data); dotchart(sort(a2)) * a3<-pca$li[,3]; names(a3)<-rownames(data); dotchart(sort(a3)) * * x<-pca$li[,1] #Contains the coordinates on the first axis. * m<-matrix(nrow=20, ncol=20) * for(i in 1:length(x)) * for(j in 1:length(x)) * m[i,j]<-sign(x[j] - x[i]) * * @endcode * * Reference: * Grantham, R. * Amino acid difference formula to help explain protein evolution * Science 185, 862-864 (1974) * * Data from AAIndex2 database, Accession Number GRAR740104. */ class GranthamAAChemicalDistance : public virtual AlphabetIndex2 { private: LinearMatrix distanceMatrix_; LinearMatrix signMatrix_; const ProteicAlphabet* alpha_; short int sign_; public: GranthamAAChemicalDistance(); GranthamAAChemicalDistance(const GranthamAAChemicalDistance& gd) : distanceMatrix_(gd.distanceMatrix_), signMatrix_(gd.signMatrix_), alpha_(gd.alpha_), sign_(gd.sign_) {} GranthamAAChemicalDistance& operator=(const GranthamAAChemicalDistance& gd) { distanceMatrix_ = gd.distanceMatrix_; signMatrix_ = gd.signMatrix_; alpha_ = gd.alpha_; sign_ = gd.sign_; return *this; } virtual ~GranthamAAChemicalDistance(); public: /** * @name Methods from the AlphabetIndex2 interface. * * @{ */ double getIndex(int state1, int state2) const throw (BadIntException); double getIndex(const std::string& state1, const std::string& state2) const throw (BadCharException); const Alphabet* getAlphabet() const { return alpha_; } GranthamAAChemicalDistance* clone() const { return new GranthamAAChemicalDistance(); } Matrix* getIndexMatrix() const; /** @} */ public: void setSymmetric(bool yn) { sign_ = (yn ? SIGN_NONE : SIGN_ARBITRARY); } bool isSymmetric() const { return sign_ == SIGN_NONE; } /** * @brief The sign of the distance is computed using the coordinate on the first axis * of a principal component analysis with the 3 elementary properties (Volume, Polarity, Composition). * Otherwise, use the default arbitrary sign. Using this option will lead isSymmetric to return false. * * @param yn Tell is the PC1-based sign should be used instead of the arbitrary one. */ void setPC1Sign(bool yn) { sign_ = (yn ? SIGN_PC1 : SIGN_ARBITRARY); } static short int SIGN_ARBITRARY; static short int SIGN_PC1; static short int SIGN_NONE; }; } // end of namespace bpp. #endif // _GRANTHAMAACHEMICALDISTANCE_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AASurfaceIndex.h000644 000000 000000 00000006745 12147656566 022600 0ustar00rootroot000000 000000 // // File: AASurfaceIndex.h // Created by: Bastien Boussau // Created on: Fri Jan 14 10:31 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _AASURFACEINDEX_H_ #define _AASURFACEINDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetTools.h" namespace bpp { /** * @brief Surface (Angström^2) of each amino acid, according to http://www.imb-jena.de/IMAGE_AA.html * * */ class AASurfaceIndex : public AlphabetIndex1 { private: std::vector surface_; public: AASurfaceIndex() : surface_() { surface_.resize(20); surface_[ 0] = 115; // A surface_[ 1] = 225; // R surface_[ 2] = 160; // N surface_[ 3] = 150; // D surface_[ 4] = 135; // C surface_[ 5] = 180; // Q surface_[ 6] = 190; // E surface_[ 7] = 75; // G surface_[ 8] = 195; // H surface_[ 9] = 175; // I surface_[10] = 170; // L surface_[11] = 200; // K surface_[12] = 185; // M surface_[13] = 210; // F surface_[14] = 145; // P surface_[15] = 115; // S surface_[16] = 140; // T surface_[17] = 255; // W surface_[18] = 230; // Y surface_[19] = 155; // V } virtual ~AASurfaceIndex() {} AASurfaceIndex* clone() const { return new AASurfaceIndex(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "AASurfaceIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return surface_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return surface_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(surface_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _AASURFACEINDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.cpp000644 000000 000000 00000007242 12147656566 025406 0ustar00rootroot000000 000000 // // File: GranthamAAChemicalDistance.cpp // Created by: Julien Dutheil // Created on: Mon Feb 21 17:42 2005 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ // from the STL: #include #include "GranthamAAChemicalDistance.h" #include "../Alphabet/AlphabetTools.h" #include using namespace bpp; short int GranthamAAChemicalDistance::SIGN_NONE = 0; short int GranthamAAChemicalDistance::SIGN_ARBITRARY = 1; short int GranthamAAChemicalDistance::SIGN_PC1 = 2; GranthamAAChemicalDistance::GranthamAAChemicalDistance() : distanceMatrix_(20, 20), signMatrix_(20, 20), alpha_(&AlphabetTools::PROTEIN_ALPHABET), sign_(0) { // Load the matrix: #include "__GranthamMatrixCode" } GranthamAAChemicalDistance::~GranthamAAChemicalDistance() {} double GranthamAAChemicalDistance::getIndex(int state1, int state2) const throw (BadIntException) { if (state1 < 0 || state1 > 19) throw BadIntException(state1, "GranthamAAChemicalDistance::getIndex(). Invalid state1.", alpha_); if (state2 < 0 || state2 > 19) throw BadIntException(state2, "GranthamAAChemicalDistance::getIndex(). Invalid state2.", alpha_); double d = distanceMatrix_(state1, state2); if (sign_ == SIGN_NONE) return NumTools::abs(d); if (sign_ == SIGN_PC1) return signMatrix_(state1, state2) * NumTools::abs(d); return d; } double GranthamAAChemicalDistance::getIndex(const std::string& state1, const std::string& state2) const throw (BadCharException) { return getIndex(alpha_->charToInt(state1), alpha_->charToInt(state2)); } Matrix* GranthamAAChemicalDistance::getIndexMatrix() const { RowMatrix* m = new RowMatrix(distanceMatrix_); if (sign_ == SIGN_NONE) { for (unsigned int i = 0; i < 20; i++) { for (unsigned int j = 0; j < 20; j++) { (*m)(i, j) = NumTools::abs((*m)(i, j)); } } } else if (sign_ == SIGN_PC1) { for (unsigned int i = 0; i < 20; i++) { for (unsigned int j = 0; j < 20; j++) { (*m)(i, j) = signMatrix_(i, j) * NumTools::abs((*m)(i, j)); } } } return m; } bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/BLOSUM50.cpp000644 000000 000000 00000005164 12147656566 021511 0ustar00rootroot000000 000000 // // File: BLOSUM50.cpp // Created by: Julien Dutheil // Created on: Tue Jan 18 10:28 2007 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "BLOSUM50.h" #include "../Alphabet/AlphabetTools.h" // from the STL: #include using namespace std; using namespace bpp; BLOSUM50::BLOSUM50() : distanceMatrix_(20, 20), alpha_(&AlphabetTools::PROTEIN_ALPHABET) { #include "__BLOSUM50MatrixCode" } double BLOSUM50::getIndex(int state1, int state2) const throw (BadIntException) { if (state1 < 0 || state1 > 19) throw BadIntException(state1, "BLOSUM50::getIndex(). Invalid state1.", alpha_); if (state2 < 0 || state2 > 19) throw BadIntException(state2, "BLOSUM50::getIndex(). Invalid state2.", alpha_); return distanceMatrix_(state1, state2); } double BLOSUM50::getIndex(const std::string& state1, const std::string& state2) const throw (BadCharException) { return distanceMatrix_(alpha_->charToInt(state1), alpha_->charToInt(state2)); } LinearMatrix* BLOSUM50::getIndexMatrix() const { return new LinearMatrix(distanceMatrix_); } bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/AAChouFasmanBSheetIndex.h000644 000000 000000 00000007106 12147656566 024317 0ustar00rootroot000000 000000 // // File: AAChouFasmanBSheetIndex.h // Created by: Bastien Boussau // Created on: Fri Jan 14 10:31 2011 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _AACHUFASMANBSHEETINDEX_H_ #define _AACHUFASMANBSHEETINDEX_H_ #include "AlphabetIndex1.h" #include "../Alphabet/ProteicAlphabet.h" #include "../Alphabet/AlphabetTools.h" namespace bpp { /** * @brief B-sheet score for the Chou-Fasman algorithm of secondary structure prediction, according to http://prowl.rockefeller.edu/aainfo/chou.htm * * */ class AAChouFasmanBSheetIndex : public AlphabetIndex1 { private: std::vector bSheet_; public: AAChouFasmanBSheetIndex() : bSheet_() { bSheet_.resize(20); bSheet_[ 0] = 83; // A bSheet_[ 1] = 93; // R bSheet_[ 2] = 89; // N bSheet_[ 3] = 54; // D bSheet_[ 4] = 119; // C bSheet_[ 5] = 110; // Q bSheet_[ 6] = 37; // E bSheet_[ 7] = 75; // G bSheet_[ 8] = 87; // H bSheet_[ 9] = 160; // I bSheet_[10] = 130; // L bSheet_[11] = 74; // K bSheet_[12] = 105; // M bSheet_[13] = 138; // F bSheet_[14] = 55; // P bSheet_[15] = 75; // S bSheet_[16] = 119; // T bSheet_[17] = 137; // W bSheet_[18] = 147; // Y bSheet_[19] = 170; // V } virtual ~AAChouFasmanBSheetIndex() {} AAChouFasmanBSheetIndex* clone() const { return new AAChouFasmanBSheetIndex(); } public: double getIndex(int state) const throw (BadIntException) { if (state < 0 || state > 19) throw BadIntException(state, "AAChouFasmanBSheetIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET); return bSheet_[state]; } double getIndex(const std::string& state) const throw (BadCharException) { return bSheet_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)]; } std::vector* getIndexVector() const { return new std::vector(bSheet_); } const Alphabet* getAlphabet() const { return &AlphabetTools::PROTEIN_ALPHABET; } }; } // end of namespace bpp. #endif // _AACHUFASMANBSHEETINDEX_H_ bpp-seq-2.1.0/src/Bpp/Seq/AlphabetIndex/SimpleScore.cpp000644 000000 000000 00000005641 12147656566 022570 0ustar00rootroot000000 000000 // // File: SimpleScore.cpp // Created by: Julien Dutheil // Created on: Fri May 04 09:35 2007 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ // from the STL: #include using namespace std; #include "SimpleScore.h" using namespace bpp; SimpleScore::SimpleScore(const Alphabet* alphabet, double match, double mismatch) : distanceMatrix_(alphabet->getSize(), alphabet->getSize()), alphabet_(alphabet) { // Load the matrix: unsigned int n = alphabet_->getSize(); for (unsigned int i = 0; i < n; i++) { for (unsigned int j = 0; j < n; j++) { distanceMatrix_(i, j) = (i == j ? match : mismatch); } } } double SimpleScore::getIndex(int state1, int state2) const throw (BadIntException) { if (state1 < 0 || state1 > (int)alphabet_->getSize()) throw BadIntException(state1, "SimpleScore::getIndex(). Invalid state1.", alphabet_); if (state2 < 0 || state2 > (int)alphabet_->getSize()) throw BadIntException(state2, "SimpleScore::getIndex(). Invalid state2.", alphabet_); return distanceMatrix_(state1, state2); } double SimpleScore::getIndex(const std::string& state1, const std::string& state2) const throw (BadCharException) { return distanceMatrix_(alphabet_->charToInt(state1), alphabet_->charToInt(state2)); } LinearMatrix* SimpleScore::getIndexMatrix() const { return new LinearMatrix(distanceMatrix_); } bpp-seq-2.1.0/src/Bpp/Seq/SequenceWithAnnotation.h000644 000000 000000 00000036540 12147656566 021741 0ustar00rootroot000000 000000 // // File: SequenceWithAnnotation.h // Created by: Julien Dutheil // Created on: Mon Jul 19 2010 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _SEQUENCEWITHANNOTATION_H_ #define _SEQUENCEWITHANNOTATION_H_ #include "Sequence.h" // From the STL: #include #include namespace bpp { class SequenceWithAnnotation; /** * @brief Interface for sequence annotations. */ class SequenceAnnotation : public virtual SymbolListListener { public: virtual SequenceAnnotation* clone() const = 0; /** * Creates a default annotation according to a given sequence. * * @param seq The template sequence (typically the one with which the annotation will be attached). */ virtual void init(const Sequence& seq) = 0; /** * @return The type of the annotation. This is used for querying annotations. */ virtual const std::string& getType() const = 0; /** * @brief Test is the annotation is valid for a given sequence. * * @param sequence The sequence to be validated against. * @param throwException If set to yes, throw an exception if the sequence is not valid. * @return true if this annotation is complient with the given sequence. */ virtual bool isValidWith(const SequenceWithAnnotation& sequence, bool throwException = true) const = 0; /** * @brief Merge the input annotation with the current one. * * @param anno The annotation to fuse. * @return true if the fusion was possible and succesful. */ virtual bool merge(const SequenceAnnotation& anno) = 0; /** * @return A new annotation corresponding to a part of the sequence. * The implementation of this highly depends on the annotation type, and might not be supported. * @param pos Starting point of the region. * @param len The length of the region, in number of positions. */ virtual SequenceAnnotation* getPartAnnotation(size_t pos, size_t len) const throw (Exception) = 0; }; /** * @brief An implementation of the Sequence interface that supports annotation. * * This is a general purpose container, containing an ordered list of states. * The states that allowed to be present in the sequence are defined * by an alphabet object, which is passed to the sequence constructor by a pointer. * * For programming convenience, the states are stored as integers, but the translation toward * and from a char description is easily performed with the Alphabet classes. * * Sequence objets also contain a name attribute and potentially several comment lines. * * The gestion of sequence content is identical to the BasicSequence object, but edition events are * properly fired. Listener are therefore properly handled. * * @see BasicSequence */ class SequenceWithAnnotation : public Sequence, public EdSymbolList { private: /** * @brief The sequence name. */ std::string name_; /** * @brief The sequence comments. */ Comments comments_; public: /** * @brief Empty constructor: build a void Sequence with just an Alphabet * * You can use it safely for all type of Alphabet in order to build an * empty Sequence i.e. without name nor sequence data. * * @param alpha A pointer toward the Alphabet to be used with this Sequence. */ SequenceWithAnnotation(const Alphabet* alpha); /** * @brief Direct constructor: build a Sequence object from a std::string * You can use it safely for RNA, DNA and protein sequences. * * It can be used with codon sequences too, the std::string will be cut into * parts of size 3. But for more complicated alphabets, you should use one * complete constructors. * * @param name The sequence name. * @param sequence The whole sequence to be parsed as a std::string. * @param alpha A pointer toward the alphabet to be used with this sequence. */ SequenceWithAnnotation(const std::string& name, const std::string& sequence, const Alphabet* alpha) throw (BadCharException); /** * @brief Direct constructor: build a Sequence object from a std::string. * * You can use it safely for RNA, DNA and protein sequences. * * It can be used with codon sequences too, the std::string will be cut into * tokens of size 3. But for more complicated alphabets, you should use one * complete constructors. * * @param name The sequence name. * @param sequence The whole sequence to be parsed as a std::string. * @param comments Comments to add to the sequence. * @param alpha A pointer toward the alphabet to be used with this sequence. */ SequenceWithAnnotation(const std::string& name, const std::string& sequence, const Comments& comments, const Alphabet* alpha) throw (BadCharException); /** * @brief General purpose constructor, can be used with any alphabet. * * You should note that the sequence is stored as a std::vector of int. * Hence each std::string in the std::vector will be translated using the alphabet object. * * @param name The sequence name. * @param sequence The sequence content. * @param alpha A pointer toward the alphabet to be used with this sequence. */ SequenceWithAnnotation(const std::string& name, const std::vector& sequence, const Alphabet* alpha) throw (BadCharException); /** * @brief General purpose constructor, can be used with any alphabet. * * You should note that the sequence is stored as a std::vector of int. * Hence each std::string in the std::vector will be translated using the alphabet object. * * @param name The sequence name. * @param sequence The sequence content. * @param comments Comments to add to the sequence. * @param alpha A pointer toward the alphabet to be used with this sequence. */ SequenceWithAnnotation(const std::string& name, const std::vector& sequence, const Comments& comments, const Alphabet* alpha) throw (BadCharException); /** * @brief General purpose constructor, can be used with any alphabet. * * @param name The sequence name. * @param sequence The sequence content. * @param alpha A pointer toward the alphabet to be used with this sequence. */ SequenceWithAnnotation(const std::string& name, const std::vector& sequence, const Alphabet* alpha) throw (BadIntException); /** * @brief General purpose constructor, can be used with any alphabet. * * @param name The sequence name. * @param sequence The sequence content. * @param comments Comments to add to the sequence. * @param alpha A pointer toward the alphabet to be used with this sequence. */ SequenceWithAnnotation(const std::string& name, const std::vector& sequence, const Comments& comments, const Alphabet* alpha) throw (BadIntException); /** * @brief The Sequence generic copy constructor. This does not perform a hard copy of the alphabet object. */ SequenceWithAnnotation(const Sequence& s); /** * @brief The Sequence copy constructor. This does not perform a hard copy of the alphabet object. */ SequenceWithAnnotation(const SequenceWithAnnotation& s); /** * @brief The Sequence generic assignment operator. This does not perform a hard copy of the alphabet object. * * @return A ref toward the assigned Sequence. */ SequenceWithAnnotation& operator=(const Sequence& s); /** * @brief The Sequence assignment operator. This does not perform a hard copy of the alphabet object. * * @return A ref toward the assigned Sequence. */ SequenceWithAnnotation& operator=(const SequenceWithAnnotation& s); virtual ~SequenceWithAnnotation() {} public: /** * @name The Clonable interface * * @{ */ SequenceWithAnnotation* clone() const { return new SequenceWithAnnotation(*this); } /** @} */ /** * @name Setting/getting the name of the sequence. * * @{ */ /** * @brief Get the name of this sequence. * * @return This sequence name. */ const std::string& getName() const { return name_; } /** * @brief Set the name of this sequence. * * @param name The new name of the sequence. */ void setName(const std::string& name) { name_ = name; } /** @} */ /** * @name Setting/getting the comments associated to the sequence. * * @{ */ /** * @brief Get the comments associated to this sequence. * * @return The comments of the sequence. */ const Comments& getComments() const { return comments_; } /** * @brief Set the comments associated to this sequence. * * @param comments The new comments of the sequence. */ void setComments(const Comments& comments) { comments_ = comments; } /** @} */ /** * @name Adjusting the size of the sequence. * * @{ */ /** * @brief Set the whole content of the sequence. * * @param sequence The new content of the sequence. * @see The Sequence constructor for information about the way sequences are internaly stored. */ virtual void setContent(const std::string& sequence) throw (BadCharException); void setContent(const std::vector& list) throw (BadIntException) { EdSymbolList::setContent(list); } void setContent(const std::vector& list) throw (BadCharException) { EdSymbolList::setContent(list); } /** * @brief Set up the size of a sequence from the right side. * * All new characters are filled with gaps. * If the specified size is < to the sequence size, the sequence will be truncated. * * @param newSize The new size of the sequence. */ virtual void setToSizeR(size_t newSize); /** * @brief Set up the size of a sequence from the left side. * * All new characters are filled with gaps. * If the specified size is < to the sequence size, the sequence will be truncated. * * @param newSize The new size of the sequence. */ virtual void setToSizeL(size_t newSize); /** * @brief Append the specified content to the sequence. * * @param content The content to append to the sequence. * @throw BadIntException If the content does not match the current alphabet. */ void append(const std::vector& content) throw (BadIntException); /** * @brief Append the specified content to the sequence. * * @param content The content to append to the sequence. * @throw BadCharException If the content does not match the current alphabet. */ void append(const std::vector& content) throw (BadCharException); /** * @brief Append the specified content to the sequence. * * @param content The content to append to the sequence. * @throw BadCharException If the content does not match the current alphabet. */ void append(const std::string& content) throw (BadCharException); /** @} */ /** * @brief Add a new annotation to the sequence. * * @param anno The annotation object to be added. Unless the annotation is shared, * the annotation object will be owned by the sequence object, and will be copied * and deleted when needed. * @throw Exception If the annotation is not valid for this sequence. * @see SequenceWithAnnotation::isValidWith */ virtual void addAnnotation(SequenceAnnotation* anno) throw (Exception) { anno->isValidWith(*this); addSymbolListListener(anno); } virtual bool hasAnnotation(const std::string& type) const { for (size_t i = 0; i < getNumberOfListeners(); ++i) { const SymbolListListener* listener = &getListener(i); const SequenceAnnotation* anno = dynamic_cast(listener); if (anno && anno->getType() == type) return true; } return false; } virtual const SequenceAnnotation& getAnnotation(const std::string& type) const { for (size_t i = 0; i < getNumberOfListeners(); ++i) { const SymbolListListener* listener = &getListener(i); const SequenceAnnotation* anno = dynamic_cast(listener); if (anno && anno->getType() == type) return *anno; } throw Exception("SequenceWithAnnotation::getAnnotation. No annotation found with type '" + type + "'."); } virtual SequenceAnnotation& getAnnotation(const std::string& type) { for (size_t i = 0; i < getNumberOfListeners(); ++i) { SymbolListListener* listener = &getListener(i); SequenceAnnotation* anno = dynamic_cast(listener); if (anno && anno->getType() == type) return *anno; } throw Exception("SequenceWithAnnotation::getAnnotation. No annotation found with type '" + type + "'."); } /** * @return The list of annotation types contained in this sequence. */ virtual std::vector getAnnotationTypes() const; /** * @brief Merge a sequence with the current one. * * Sequences must have the same name and alphabets. * Only first sequence's commentaries are kept. * Annotations that could not be merged will not be added in the concatenated sequence. * See the documentation of each annotation class for more details. * * @param swa The sequence to merge with. * @throw AlphabetMismatchException If the two alphabets do not match. * @throw Exception If the sequence names do not match. */ virtual void merge(const SequenceWithAnnotation& swa) throw (AlphabetMismatchException, Exception); }; } //end of namespace bpp. #endif // _SEQUENCEWITHANNOTATION_H_ bpp-seq-2.1.0/src/Bpp/Seq/DistanceMatrix.cpp000644 000000 000000 00000003721 12147656566 020547 0ustar00rootroot000000 000000 // // File: DistanceMatrix.cpp // Created on: Sat Feb 17 15:25 2007 // /* Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) This software is a computer program whose purpose is to provide classes for phylogenetic data analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "DistanceMatrix.h" using namespace std; using namespace bpp; size_t DistanceMatrix::getNameIndex(const std::string& name) const throw (Exception) { for (size_t i = 0; i < names_.size(); ++i) { if (names_[i] == name) return i; } throw Exception("DistanceMatrix::getNameIndex. Name not found: '" + name + "'."); } bpp-seq-2.1.0/src/Bpp/Seq/SequenceWithAnnotationTools.cpp000644 000000 000000 00000006702 12147656566 023312 0ustar00rootroot000000 000000 // // File: SequenceWithAnnotationTools.cpp // Authors: Julien Dutheil // Created on: 06 Sep 2010 // /* Copyright or © or Copr. Bio++ Development Team, (Sep 06, 2010) This software is a computer program whose purpose is to provide classes for sequences analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SequenceWithAnnotationTools.h" #include "Alphabet/CaseMaskedAlphabet.h" using namespace bpp; using namespace std; const string SequenceMask::MASK = "Boolean mask"; /******************************************************************************/ void SequenceMask::afterSequenceChanged(const SymbolListEditionEvent& event) { mask_.clear(); mask_.insert(mask_.begin(), event.getSymbolList()->size(), false); } /******************************************************************************/ void SequenceMask::afterSequenceInserted(const SymbolListInsertionEvent& event) { mask_.insert(mask_.begin() + event.getPosition(), event.getLength(), false); } /******************************************************************************/ void SequenceMask::afterSequenceDeleted(const SymbolListDeletionEvent& event) { mask_.erase(mask_.begin() + event.getPosition(), mask_.begin() + event.getPosition() + event.getLength()); } /******************************************************************************/ SequenceWithAnnotation* SequenceWithAnnotationTools::createMaskAnnotation(const Sequence& seq) throw (AlphabetException) { const CaseMaskedAlphabet* cma = dynamic_cast(seq.getAlphabet()); if (cma) { SequenceWithAnnotation* seqa = new SequenceWithAnnotation(seq.getName(), seq.toString(), seq.getComments(), seq.getAlphabet()); vector mask(seq.size()); for (unsigned int i = 0; i < seq.size(); ++i) { mask[i] = cma->isMasked(seq[i]); } seqa->addAnnotation(new SequenceMask(mask)); return seqa; } else { throw AlphabetException("SequenceWithAnnotationTools::createMaskAnnotation. Alphabet should be a CaseMaskedAlphabet.", seq.getAlphabet()); } } /******************************************************************************/