bpp-popgen-2.1.0/CMakeLists.txt000644 000000 000000 00000014361 12147656633 016321 0ustar00rootroot000000 000000 # CMake script for Bio++ PopGenLib # Author: Sylvain Gaillard and Julien Dutheil # Created: 21/08/2009 # Global parameters CMAKE_MINIMUM_REQUIRED(VERSION 2.6) PROJECT(bpp-popgen CXX) IF(NOT CMAKE_BUILD_TYPE) SET(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE) ENDIF(NOT CMAKE_BUILD_TYPE) SET(CMAKE_CXX_FLAGS "-Wall -Weffc++ -Wshadow -Wconversion") IF(NOT NO_VIRTUAL_COV) SET(NO_VIRTUAL_COV FALSE CACHE BOOL "Disable covariant return type with virtual inheritance, for compilers that do not support it." FORCE) ENDIF(NOT NO_VIRTUAL_COV) IF(NO_VIRTUAL_COV) MESSAGE("-- Covariant return with virtual inheritance disabled.") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNO_VIRTUAL_COV=1") ENDIF(NO_VIRTUAL_COV) IF(NOT NO_DEP_CHECK) SET(NO_DEP_CHECK FALSE CACHE BOOL "Disable dependencies check for building distribution only." FORCE) ENDIF(NOT NO_DEP_CHECK) IF(NO_DEP_CHECK) MESSAGE("-- Dependencies checking disabled. Only distribution can be built.") ELSE(NO_DEP_CHECK) # Libtool-like version number # CURRENT:REVISION:AGE => file.so.(C-A).A.R # current: The most recent interface number that this library implements. # revision: The implementation number of the current interface. # age: The difference between the newest and oldest interfaces that this # library implements. # In other words, the library implements all the interface numbers in the # range from number current - age to current. SET(BPPPOPGEN_VERSION_CURRENT "6") SET(BPPPOPGEN_VERSION_REVISION "3") SET(BPPPOPGEN_VERSION_AGE "0") # Effective version number computation MATH(EXPR BPPPOPGEN_VERSION_MAJOR "${BPPPOPGEN_VERSION_CURRENT} - ${BPPPOPGEN_VERSION_AGE}") SET(BPPPOPGEN_VERSION_MINOR ${BPPPOPGEN_VERSION_AGE}) SET(BPPPOPGEN_VERSION_PATCH ${BPPPOPGEN_VERSION_REVISION}) SET(BPPPOPGEN_VERSION "${BPPPOPGEN_VERSION_MAJOR}.${BPPPOPGEN_VERSION_MINOR}.${BPPPOPGEN_VERSION_PATCH}") # Set the CMAKE_PREFIX_PATH for the find_library fonction when using non # standard install location IF(CMAKE_INSTALL_PREFIX) SET(CMAKE_PREFIX_PATH "${CMAKE_INSTALL_PREFIX}" ${CMAKE_PREFIX_PATH}) ENDIF(CMAKE_INSTALL_PREFIX) #here is a useful function: MACRO(IMPROVED_FIND_LIBRARY OUTPUT_LIBS lib_name include_to_find) #start: FIND_PATH(${lib_name}_INCLUDE_DIR ${include_to_find}) SET(${lib_name}_NAMES ${lib_name} ${lib_name}lib ${lib_name}dll) FIND_LIBRARY(${lib_name}_LIBRARY NAMES ${${lib_name}_NAMES} PATH_SUFFIXES lib${LIB_SUFFIX}) IF(${lib_name}_LIBRARY) MESSAGE("-- Library ${lib_name} found here:") MESSAGE(" includes : ${${lib_name}_INCLUDE_DIR}") MESSAGE(" libraries: ${${lib_name}_LIBRARY}") ELSE(${lib_name}_LIBRARY) MESSAGE(FATAL_ERROR "${lib_name} required but not found.") ENDIF(${lib_name}_LIBRARY) #add the dependency: INCLUDE_DIRECTORIES(${${lib_name}_INCLUDE_DIR}) SET(${OUTPUT_LIBS} ${${OUTPUT_LIBS}} ${${lib_name}_LIBRARY}) ENDMACRO(IMPROVED_FIND_LIBRARY) #Find the Bio++ libraries: IMPROVED_FIND_LIBRARY(LIBS bpp-seq Bpp/Seq/Alphabet/Alphabet.h) IMPROVED_FIND_LIBRARY(LIBS bpp-core Bpp/Clonable.h) # Subdirectories ADD_SUBDIRECTORY(src) # Doxygen FIND_PACKAGE(Doxygen) IF (DOXYGEN_FOUND) ADD_CUSTOM_TARGET (apidoc cp Doxyfile ${CMAKE_BINARY_DIR}/Doxyfile-build COMMAND echo "OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}" >> ${CMAKE_BINARY_DIR}/Doxyfile-build COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_BINARY_DIR}/Doxyfile-build WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) ADD_CUSTOM_TARGET (apidoc-stable cp Doxyfile ${CMAKE_BINARY_DIR}/Doxyfile-stable COMMAND echo "OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}" >> ${CMAKE_BINARY_DIR}/Doxyfile-stable COMMAND echo "HTML_HEADER=header.html" >> ${CMAKE_BINARY_DIR}/Doxyfile-stable COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_BINARY_DIR}/Doxyfile-stable WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) ENDIF (DOXYGEN_FOUND) ENDIF(NO_DEP_CHECK) # Packager SET(CPACK_PACKAGE_NAME "libbpp-popgen") SET(CPACK_PACKAGE_VENDOR "Bio++ Development Team") SET(CPACK_PACKAGE_VERSION "2.1.0") SET(CPACK_PACKAGE_VERSION_MAJOR "2") SET(CPACK_PACKAGE_VERSION_MINOR "1") SET(CPACK_PACKAGE_VERSION_PATCH "0") SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "The Bio++ Population Genetics library") SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING.txt") SET(CPACK_RESOURCE_FILE_AUTHORS "${CMAKE_SOURCE_DIR}/AUTHORS.txt") SET(CPACK_RESOURCE_FILE_INSTALL "${CMAKE_SOURCE_DIR}/INSTALL.txt") SET(CPACK_SOURCE_GENERATOR "TGZ") SET(CPACK_SOURCE_IGNORE_FILES "CMakeFiles" "Makefile" "_CPack_Packages" "CMakeCache.txt" ".*\\\\.cmake" ".*\\\\.git" ".*\\\\.gz" ".*\\\\.deb" ".*\\\\.rpm" ".*\\\\.dmg" ".*\\\\.sh" ".*\\\\..*\\\\.swp" "src/\\\\..*" "src/libbpp*" "debian/tmp" "debian/libbpp.*/" "debian/libbpp.*\\\\.so.*" "debian/libbpp.*\\\\.a" "debian/libbpp.*\\\\.substvars" "debian/libbpp.*\\\\.debhelper" "debian/debhelper\\\\.log" "html" "PopGen.tag" "Testing" "build-stamp" "install_manifest.txt" "DartConfiguration.tcl" ${CPACK_SOURCE_IGNORE_FILES} ) IF (MACOS) SET(CPACK_GENERATOR "Bundle") ENDIF() SET(CPACK_SOURCE_PACKAGE_FILE_NAME "${CMAKE_PROJECT_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") SET(CPACK_DEBSOURCE_PACKAGE_FILE_NAME "lib${CMAKE_PROJECT_NAME}_${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.orig") INCLUDE(CPack) #This adds the 'dist' target ADD_CUSTOM_TARGET(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source) # 'clean' is not (yet) a first class target. However, we need to clean the directories before building the sources: IF("${CMAKE_GENERATOR}" MATCHES "Make") ADD_CUSTOM_TARGET(make_clean COMMAND ${CMAKE_MAKE_PROGRAM} clean WORKING_DIRECTORY ${CMAKE_CURRENT_DIR} ) ADD_DEPENDENCIES(dist make_clean) ENDIF() IF(NOT NO_DEP_CHECK) IF (UNIX) #This creates deb packages: ADD_CUSTOM_TARGET(origdist COMMAND cp ${CPACK_SOURCE_PACKAGE_FILE_NAME}.tar.gz ../${CPACK_DEBSOURCE_PACKAGE_FILE_NAME}.tar.gz) ADD_DEPENDENCIES(origdist dist) ADD_CUSTOM_TARGET(deb dpkg-buildpackage -uc -us -i${CPACK_SOURCE_PACKAGE_FILE_NAME}.tar.gz) ADD_DEPENDENCIES(deb origdist) #This creates rpm packages: ADD_CUSTOM_TARGET(rpm rpmbuild -ta ${CPACK_SOURCE_PACKAGE_FILE_NAME}.tar.gz) ADD_DEPENDENCIES(rpm dist) ENDIF() ENDIF(NOT NO_DEP_CHECK) bpp-popgen-2.1.0/bpp-popgen.spec000644 000000 000000 00000012566 12147656633 016511 0ustar00rootroot000000 000000 %define _basename bpp-popgen %define _version 2.1.0 %define _release 1 %define _prefix /usr URL: http://biopp.univ-montp2.fr/ Name: %{_basename} Version: %{_version} Release: %{_release} License: CECILL-2.0 Vendor: The Bio++ Project Source: http://biopp.univ-montp2.fr/repos/sources/%{_basename}-%{_version}.tar.gz Summary: Bio++ Population Genetics library Group: Development/Libraries/C and C++ Requires: bpp-core = %{_version} Requires: bpp-seq = %{_version} BuildRoot: %{_builddir}/%{_basename}-root BuildRequires: cmake >= 2.6.0 BuildRequires: gcc-c++ >= 4.0.0 BuildRequires: libbpp-core2 = %{_version} BuildRequires: libbpp-core-devel = %{_version} BuildRequires: libbpp-seq9 = %{_version} BuildRequires: libbpp-seq-devel = %{_version} AutoReq: yes AutoProv: yes %description This library contains utilitary and classes for population genetics analysis. It is part of the Bio++ project. %package -n libbpp-popgen6 Summary: Bio++ Population Genetics library Group: Development/Libraries/C and C++ %description -n libbpp-popgen6 This library contains utilitary and classes for population genetics and molecular evolution analysis. It is part of the Bio++ project. %package -n libbpp-popgen-devel Summary: Libraries, includes to develop applications with %{_basename} Group: Development/Libraries/C and C++ Requires: libbpp-popgen6 = %{_version} Requires: libbpp-seq9 = %{_version} Requires: libbpp-seq-devel = %{_version} Requires: libbpp-core2 = %{_version} Requires: libbpp-core-devel = %{_version} %description -n libbpp-popgen-devel The libbpp-popgen-devel package contains the header files and static libraries for building applications which use %{_basename}. %prep %setup -q %build CFLAGS="$RPM_OPT_FLAGS" CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=%{_prefix} -DBUILD_TESTING=OFF" if [ %{_lib} == 'lib64' ] ; then CMAKE_FLAGS="$CMAKE_FLAGS -DLIB_SUFFIX=64" fi cmake $CMAKE_FLAGS . make %install make DESTDIR=$RPM_BUILD_ROOT install %clean rm -rf $RPM_BUILD_ROOT %post -n libbpp-popgen6 -p /sbin/ldconfig %post -n libbpp-popgen-devel createGeneric() { echo "-- Creating generic include file: $1.all" #Make sure we run into subdirectories first: dirs=() for file in "$1"/* do if [ -d "$file" ] then # Recursion: dirs+=( "$file" ) fi done for dir in ${dirs[@]} do createGeneric $dir done #Now list all files, including newly created .all files: if [ -f $1.all ] then rm $1.all fi dir=`basename $1` for file in "$1"/* do if [ -f "$file" ] && ( [ "${file##*.}" == "h" ] || [ "${file##*.}" == "all" ] ) then file=`basename $file` echo "#include \"$dir/$file\"" >> $1.all fi done; } # Actualize .all files createGeneric %{_prefix}/include/Bpp exit 0 %preun -n libbpp-popgen-devel removeGeneric() { if [ -f $1.all ] then echo "-- Remove generic include file: $1.all" rm $1.all fi for file in "$1"/* do if [ -d "$file" ] then # Recursion: removeGeneric $file fi done } # Actualize .all files removeGeneric %{_prefix}/include/Bpp exit 0 %postun -n libbpp-popgen6 -p /sbin/ldconfig %postun -n libbpp-popgen-devel createGeneric() { echo "-- Creating generic include file: $1.all" #Make sure we run into subdirectories first: dirs=() for file in "$1"/* do if [ -d "$file" ] then # Recursion: dirs+=( "$file" ) fi done for dir in ${dirs[@]} do createGeneric $dir done #Now list all files, including newly created .all files: if [ -f $1.all ] then rm $1.all fi dir=`basename $1` for file in "$1"/* do if [ -f "$file" ] && ( [ "${file##*.}" == "h" ] || [ "${file##*.}" == "all" ] ) then file=`basename $file` echo "#include \"$dir/$file\"" >> $1.all fi done; } %files -n libbpp-popgen6 %defattr(-,root,root) %doc AUTHORS.txt COPYING.txt INSTALL.txt ChangeLog %{_prefix}/%{_lib}/lib*.so.* %files -n libbpp-popgen-devel %defattr(-,root,root) %doc AUTHORS.txt COPYING.txt INSTALL.txt ChangeLog %{_prefix}/%{_lib}/lib*.so %{_prefix}/%{_lib}/lib*.a %{_prefix}/include/* %changelog * Thu Mar 07 2013 Julien Dutheil 2.1.0-1 - Bug fixed and warnings removed. * Thu Feb 09 2012 Julien Dutheil 2.0.3-1 - Recompilation for dependencies. * Thu Jun 09 2011 Julien Dutheil 2.0.2-1 - New Fst calculations + bugs fixed. * Mon Feb 28 2011 Julien Dutheil 2.0.1-1 * Mon Feb 07 2011 Julien Dutheil 2.0.0-1 * Thu Mar 25 2010 Julien Dutheil 1.5.0-1 * Wed Jun 10 2009 Julien Dutheil 1.4.0-1 * Thu Dec 11 2008 Julien Dutheil 1.3.1-1 * Mon Jul 21 2008 Julien Dutheil 1.3.0-1 * Fri Jan 18 2008 Julien Dutheil 1.2.0-1 * Fri Jul 06 2007 Julien Dutheil 1.1.1-1 - For compatibility. No more dependency for Bpp-Phyl. * Fri Jan 19 2007 Julien Dutheil 1.1.0-2 - Build 2 for compatibility. * Mon Aug 28 2006 Julien Dutheil 1.1.0-1 - Now requires Bpp-Phyl too! * Tue Apr 18 2006 Julien Dutheil 1.0.0-2 - Build 2 for compatibility with other libs. Added STL dependency. * Fri Nov 16 2005 Julien Dutheil 1.0.0-1 - First draft of the spec file. bpp-popgen-2.1.0/COPYING.txt000644 000000 000000 00000051140 12147656633 015426 0ustar00rootroot000000 000000 CeCILL FREE SOFTWARE LICENSE AGREEMENT Notice This Agreement is a Free Software license agreement that is the result of discussions between its authors in order to ensure compliance with the two main principles guiding its drafting: * firstly, compliance with the principles governing the distribution of Free Software: access to source code, broad rights granted to users, * secondly, the election of a governing law, French law, with which it is conformant, both as regards the law of torts and intellectual property law, and the protection that it offers to both authors and holders of the economic rights over software. The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[logiciel] L[ibre]) license are: Commissariat à l'Energie Atomique - CEA, a public scientific, technical and industrial establishment, having its principal place of business at 31-33 rue de la Fédération, 75752 Paris cedex 15, France. Centre National de la Recherche Scientifique - CNRS, a public scientific and technological establishment, having its principal place of business at 3 rue Michel-Ange 75794 Paris cedex 16, France. Institut National de Recherche en Informatique et en Automatique - INRIA, a public scientific and technological establishment, having its principal place of business at Domaine de Voluceau, Rocquencourt, BP 105, 78153 Le Chesnay cedex, France. Preamble The purpose of this Free Software license agreement is to grant users the right to modify and redistribute the software governed by this license within the framework of an open source distribution model. The exercising of these rights is conditional upon certain obligations for users so as to preserve this status for all subsequent redistributions. In consideration of access to the source code and the rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors only have limited liability. In this respect, the risks associated with loading, using, modifying and/or developing or reproducing the software by the user are brought to the user's attention, given its Free Software status, which may make it complicated to use, with the result that its use is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the Software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions of security. This Agreement may be freely reproduced and published, provided it is not altered, and that no provisions are either added or removed herefrom. This Agreement may apply to any or all software for which the holder of the economic rights decides to submit the use thereof to its provisions. Article 1 - DEFINITIONS For the purpose of this Agreement, when the following expressions commence with a capital letter, they shall have the following meaning: Agreement: means this license agreement, and its possible subsequent versions and annexes. Software: means the software in its Object Code and/or Source Code form and, where applicable, its documentation, "as is" when the Licensee accepts the Agreement. Initial Software: means the Software in its Source Code and possibly its Object Code form and, where applicable, its documentation, "as is" when it is first distributed under the terms and conditions of the Agreement. Modified Software: means the Software modified by at least one Contribution. Source Code: means all the Software's instructions and program lines to which access is required so as to modify the Software. Object Code: means the binary files originating from the compilation of the Source Code. Holder: means the holder(s) of the economic rights over the Initial Software. Licensee: means the Software user(s) having accepted the Agreement. Contributor: means a Licensee having made at least one Contribution. Licensor: means the Holder, or any other individual or legal entity, who distributes the Software under the Agreement. Contribution: means any or all modifications, corrections, translations, adaptations and/or new functions integrated into the Software by any or all Contributors, as well as any or all Internal Modules. Module: means a set of sources files including their documentation that enables supplementary functions or services in addition to those offered by the Software. External Module: means any or all Modules, not derived from the Software, so that this Module and the Software run in separate address spaces, with one calling the other when they are run. Internal Module: means any or all Module, connected to the Software so that they both execute in the same address space. GNU GPL: means the GNU General Public License version 2 or any subsequent version, as published by the Free Software Foundation Inc. Parties: mean both the Licensee and the Licensor. These expressions may be used both in singular and plural form. Article 2 - PURPOSE The purpose of the Agreement is the grant by the Licensor to the Licensee of a non-exclusive, transferable and worldwide license for the Software as set forth in Article 5 hereinafter for the whole term of the protection granted by the rights over said Software. Article 3 - ACCEPTANCE 3.1 The Licensee shall be deemed as having accepted the terms and conditions of this Agreement upon the occurrence of the first of the following events: * (i) loading the Software by any or all means, notably, by downloading from a remote server, or by loading from a physical medium; * (ii) the first time the Licensee exercises any of the rights granted hereunder. 3.2 One copy of the Agreement, containing a notice relating to the characteristics of the Software, to the limited warranty, and to the fact that its use is restricted to experienced users has been provided to the Licensee prior to its acceptance as set forth in Article 3.1 hereinabove, and the Licensee hereby acknowledges that it has read and understood it. Article 4 - EFFECTIVE DATE AND TERM 4.1 EFFECTIVE DATE The Agreement shall become effective on the date when it is accepted by the Licensee as set forth in Article 3.1. 4.2 TERM The Agreement shall remain in force for the entire legal term of protection of the economic rights over the Software. Article 5 - SCOPE OF RIGHTS GRANTED The Licensor hereby grants to the Licensee, who accepts, the following rights over the Software for any or all use, and for the term of the Agreement, on the basis of the terms and conditions set forth hereinafter. Besides, if the Licensor owns or comes to own one or more patents protecting all or part of the functions of the Software or of its components, the Licensor undertakes not to enforce the rights granted by these patents against successive Licensees using, exploiting or modifying the Software. If these patents are transferred, the Licensor undertakes to have the transferees subscribe to the obligations set forth in this paragraph. 5.1 RIGHT OF USE The Licensee is authorized to use the Software, without any limitation as to its fields of application, with it being hereinafter specified that this comprises: 1. permanent or temporary reproduction of all or part of the Software by any or all means and in any or all form. 2. loading, displaying, running, or storing the Software on any or all medium. 3. entitlement to observe, study or test its operation so as to determine the ideas and principles behind any or all constituent elements of said Software. This shall apply when the Licensee carries out any or all loading, displaying, running, transmission or storage operation as regards the Software, that it is entitled to carry out hereunder. 5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS The right to make Contributions includes the right to translate, adapt, arrange, or make any or all modifications to the Software, and the right to reproduce the resulting Software. The Licensee is authorized to make any or all Contributions to the Software provided that it includes an explicit notice that it is the author of said Contribution and indicates the date of the creation thereof. 5.3 RIGHT OF DISTRIBUTION In particular, the right of distribution includes the right to publish, transmit and communicate the Software to the general public on any or all medium, and by any or all means, and the right to market, either in consideration of a fee, or free of charge, one or more copies of the Software by any means. The Licensee is further authorized to distribute copies of the modified or unmodified Software to third parties according to the terms and conditions set forth hereinafter. 5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION The Licensee is authorized to distribute true copies of the Software in Source Code or Object Code form, provided that said distribution complies with all the provisions of the Agreement and is accompanied by: 1. a copy of the Agreement, 2. a notice relating to the limitation of both the Licensor's warranty and liability as set forth in Articles 8 and 9, and that, in the event that only the Object Code of the Software is redistributed, the Licensee allows future Licensees unhindered access to the full Source Code of the Software by indicating how to access it, it being understood that the additional cost of acquiring the Source Code shall not exceed the cost of transferring the data. 5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE When the Licensee makes a Contribution to the Software, the terms and conditions for the distribution of the Modified Software become subject to all the provisions of this Agreement. The Licensee is authorized to distribute the Modified Software, in Source Code or Object Code form, provided that said distribution complies with all the provisions of the Agreement and is accompanied by: 1. a copy of the Agreement, 2. a notice relating to the limitation of both the Licensor's warranty and liability as set forth in Articles 8 and 9, and that, in the event that only the Object Code of the Modified Software is redistributed, the Licensee allows future Licensees unhindered access to the full Source Code of the Modified Software by indicating how to access it, it being understood that the additional cost of acquiring the Source Code shall not exceed the cost of transferring the data. 5.3.3 DISTRIBUTION OF EXTERNAL MODULES When the Licensee has developed an External Module, the terms and conditions of this Agreement do not apply to said External Module, that may be distributed under a separate license agreement. 5.3.4 COMPATIBILITY WITH THE GNU GPL The Licensee can include a code that is subject to the provisions of one of the versions of the GNU GPL in the Modified or unmodified Software, and distribute that entire code under the terms of the same version of the GNU GPL. The Licensee can include the Modified or unmodified Software in a code that is subject to the provisions of one of the versions of the GNU GPL, and distribute that entire code under the terms of the same version of the GNU GPL. Article 6 - INTELLECTUAL PROPERTY 6.1 OVER THE INITIAL SOFTWARE The Holder owns the economic rights over the Initial Software. Any or all use of the Initial Software is subject to compliance with the terms and conditions under which the Holder has elected to distribute its work and no one shall be entitled to modify the terms and conditions for the distribution of said Initial Software. The Holder undertakes that the Initial Software will remain ruled at least by the current license, for the duration set forth in article 4.2. 6.2 OVER THE CONTRIBUTIONS A Licensee who develops a Contribution is the owner of the intellectual property rights over this Contribution as defined by applicable law. 6.3 OVER THE EXTERNAL MODULES A Licensee who develops an External Module is the owner of the intellectual property rights over this External Module as defined by applicable law and is free to choose the type of agreement that shall govern its distribution. 6.4 JOINT PROVISIONS The Licensee expressly undertakes: 1. not to remove, or modify, in any manner, the intellectual property notices attached to the Software; 2. to reproduce said notices, in an identical manner, in the copies of the Software modified or not. The Licensee undertakes not to directly or indirectly infringe the intellectual property rights of the Holder and/or Contributors on the Software and to take, where applicable, vis-à-vis its staff, any and all measures required to ensure respect of said intellectual property rights of the Holder and/or Contributors. Article 7 - RELATED SERVICES 7.1 Under no circumstances shall the Agreement oblige the Licensor to provide technical assistance or maintenance services for the Software. However, the Licensor is entitled to offer this type of services. The terms and conditions of such technical assistance, and/or such maintenance, shall be set forth in a separate instrument. Only the Licensor offering said maintenance and/or technical assistance services shall incur liability therefor. 7.2 Similarly, any Licensor is entitled to offer to its licensees, under its sole responsibility, a warranty, that shall only be binding upon itself, for the redistribution of the Software and/or the Modified Software, under terms and conditions that it is free to decide. Said warranty, and the financial terms and conditions of its application, shall be subject of a separate instrument executed between the Licensor and the Licensee. Article 8 - LIABILITY 8.1 Subject to the provisions of Article 8.2, the Licensee shall be entitled to claim compensation for any direct loss it may have suffered from the Software as a result of a fault on the part of the relevant Licensor, subject to providing evidence thereof. 8.2 The Licensor's liability is limited to the commitments made under this Agreement and shall not be incurred as a result of in particular: (i) loss due the Licensee's total or partial failure to fulfill its obligations, (ii) direct or consequential loss that is suffered by the Licensee due to the use or performance of the Software, and (iii) more generally, any consequential loss. In particular the Parties expressly agree that any or all pecuniary or business loss (i.e. loss of data, loss of profits, operating loss, loss of customers or orders, opportunity cost, any disturbance to business activities) or any or all legal proceedings instituted against the Licensee by a third party, shall constitute consequential loss and shall not provide entitlement to any or all compensation from the Licensor. Article 9 - WARRANTY 9.1 The Licensee acknowledges that the scientific and technical state-of-the-art when the Software was distributed did not enable all possible uses to be tested and verified, nor for the presence of possible defects to be detected. In this respect, the Licensee's attention has been drawn to the risks associated with loading, using, modifying and/or developing and reproducing the Software which are reserved for experienced users. The Licensee shall be responsible for verifying, by any or all means, the product's suitability for its requirements, its good working order, and for ensuring that it shall not cause damage to either persons or properties. 9.2 The Licensor hereby represents, in good faith, that it is entitled to grant all the rights over the Software (including in particular the rights set forth in Article 5). 9.3 The Licensee acknowledges that the Software is supplied "as is" by the Licensor without any other express or tacit warranty, other than that provided for in Article 9.2 and, in particular, without any warranty as to its commercial value, its secured, safe, innovative or relevant nature. Specifically, the Licensor does not warrant that the Software is free from any error, that it will operate without interruption, that it will be compatible with the Licensee's own equipment and software configuration, nor that it will meet the Licensee's requirements. 9.4 The Licensor does not either expressly or tacitly warrant that the Software does not infringe any third party intellectual property right relating to a patent, software or any other property right. Therefore, the Licensor disclaims any and all liability towards the Licensee arising out of any or all proceedings for infringement that may be instituted in respect of the use, modification and redistribution of the Software. Nevertheless, should such proceedings be instituted against the Licensee, the Licensor shall provide it with technical and legal assistance for its defense. Such technical and legal assistance shall be decided on a case-by-case basis between the relevant Licensor and the Licensee pursuant to a memorandum of understanding. The Licensor disclaims any and all liability as regards the Licensee's use of the name of the Software. No warranty is given as regards the existence of prior rights over the name of the Software or as regards the existence of a trademark. Article 10 - TERMINATION 10.1 In the event of a breach by the Licensee of its obligations hereunder, the Licensor may automatically terminate this Agreement thirty (30) days after notice has been sent to the Licensee and has remained ineffective. 10.2 A Licensee whose Agreement is terminated shall no longer be authorized to use, modify or distribute the Software. However, any licenses that it may have granted prior to termination of the Agreement shall remain valid subject to their having been granted in compliance with the terms and conditions hereof. Article 11 - MISCELLANEOUS 11.1 EXCUSABLE EVENTS Neither Party shall be liable for any or all delay, or failure to perform the Agreement, that may be attributable to an event of force majeure, an act of God or an outside cause, such as defective functioning or interruptions of the electricity or telecommunications networks, network paralysis following a virus attack, intervention by government authorities, natural disasters, water damage, earthquakes, fire, explosions, strikes and labor unrest, war, etc. 11.2 Any Failure by either Party, on one or more occasions, to invoke one or more of the provisions hereof, shall under no circumstances be interpreted as being a waiver by the interested Party of its right to invoke said provision(s) subsequently. 11.3 The Agreement cancels and replaces any or all previous agreements, whether written or oral, between the Parties and having the same purpose, and constitutes the entirety of the agreement between said Parties concerning said purpose. No supplement or modification to the terms and conditions hereof shall be effective as between the Parties unless it is made in writing and signed by their duly authorized representatives. 11.4 In the event that one or more of the provisions hereof were to conflict with a current or future applicable act or legislative text, said act or legislative text shall prevail, and the Parties shall make the necessary amendments so as to comply with said act or legislative text. All other provisions shall remain effective. Similarly, invalidity of a provision of the Agreement, for any reason whatsoever, shall not cause the Agreement as a whole to be invalid. 11.5 LANGUAGE The Agreement is drafted in both French and English and both versions are deemed authentic. Article 12 - NEW VERSIONS OF THE AGREEMENT 12.1 Any person is authorized to duplicate and distribute copies of this Agreement. 12.2 So as to ensure coherence, the wording of this Agreement is protected and may only be modified by the authors of the License, who reserve the right to periodically publish updates or new versions of the Agreement, each with a separate number. These subsequent versions may address new issues encountered by Free Software. 12.3 Any Software distributed under a given version of the Agreement may only be subsequently distributed under the same version of the Agreement or a subsequent version, subject to the provisions of Article 5.3.4. Article 13 - GOVERNING LAW AND JURISDICTION 13.1 The Agreement is governed by French law. The Parties agree to endeavor to seek an amicable solution to any disagreements or disputes that may arise during the performance of the Agreement. 13.2 Failing an amicable solution within two (2) months as from their occurrence, and unless emergency proceedings are necessary, the disagreements or disputes shall be referred to the Paris Courts having jurisdiction, by the more diligent Party. Version 2.0 dated 2005-05-21. bpp-popgen-2.1.0/INSTALL.txt000644 000000 000000 00000000700 12147656633 015420 0ustar00rootroot000000 000000 This software needs cmake >= 2.6 to build. After installing cmake, run it with the following command: cmake -DCMAKE_INSTALL_PREFIX=[where to install, for instance /usr/local or $HOME/.local] . If available, you can also use ccmake instead of cmake for a more user-friendly interface. Then compile and install the software with make install You may also consider installing and using the software checkinstall for easier system administration. bpp-popgen-2.1.0/AUTHORS.txt000644 000000 000000 00000002471 12147656633 015446 0ustar00rootroot000000 000000 Eric Bazin Sylvain Gaillard Sylvain Glémin Khalid Belkhir Contributed code to Bio++ was enabled thanks to the following institutions and resources: 2002 - 2006 Laboratoire GPIA - UMR CNRS 5171 Université Montpellier 2 (Eric Bazin, Khalid Belkhir, Guillaume Deuchst, Julien Dutheil, Sylvain Gaillard, Nicolas Galtier, Sylvain Glémin) 2005 - ISE-M UMR CNRS 5554 Université Montpellier 2 (Vincent Ranwez, Céline Scornavacca) 2006 - ISE-M UMR CNRS 5554 Université Montpellier 2 (Khalid Belkhir, Nicolas Galtier, Sylvain Glémin) 2006 - 2007 ISE-M UMR CNRS 5554 Université Montpellier 2 (Julien Dutheil) 2007 - 2010 Bioinformatics Research Center, University of Aarhus (Julien Dutheil). Funded by European research Area on Plant Genomics (ERA-PG) ARelatives. 2010 - ISE-M UMR CNRS 5554 Université Montpellier 2 (Julien Dutheil) 2007 - Genetics and Horticulture UMR INRA 1259 Angers-Nantes INRA Center (Sylvain Gaillard) 2008 - 2009 Laboratoire BBE - UMR CNRS 5558 Université Lyon 1 (Bastien Boussau) 2009 - 2010 Berkeley University (Bastien Boussau) 2010 - Laboratoire BBE - UMR CNRS 5558 Université Lyon 1 (Bastien Boussau) 2008 - Laboratoire BBE - UMR CNRS 5558 Université Lyon 1 (Laurent Guéguen) bpp-popgen-2.1.0/debian/copyright000644 000000 000000 00000005626 12147656633 016742 0ustar00rootroot000000 000000 This package was debianized by Julien Dutheil on Thu, 07 Mar 2013 10:51:00 +0100 It was downloaded from Upstream Author: Julien Dutheil Copyright: Copyright (C) 2013 Bio++ Development Team License: This package is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This package is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this package; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA On Debian systems, the complete text of the GNU General Public License can be found in `/usr/share/common-licenses/GPL'. The Debian packaging is (C) 2013, Julien Dutheil and is licensed under the GPL, see above. The provided software is distributed under the CeCILL license: This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. The complete text of the license may be found here: http://www.cecill.info/licences/Licence_CeCILL_V2-en.html bpp-popgen-2.1.0/debian/changelog000644 000000 000000 00000003214 12147656633 016650 0ustar00rootroot000000 000000 libbpp-popgen (2.1.0-1) unstable; urgency=low * Bug fixed and warnings removed. -- Julien Dutheil Thu, 07 Mar 2013 10:51:00 +0100 libbpp-popgen (2.0.3-1) unstable; urgency=low * Recompilation because of dependencies. -- Julien Dutheil Thu, 09 Feb 2012 16:30:00 +0100 libbpp-popgen (2.0.2-1) unstable; urgency=low * RFP: Bio++ -- The Bio++ bioinformatics libraries. (Closes: #616373). * Packages are now non-native. * New Fst computation from Hudson, Slatkin and Maddison (Genetics, 1992). -- Julien Dutheil Thu, 09 Jun 2011 11:00:00 +0100 libbpp-popgen (2.0.1) unstable; urgency=low * Fixed copyright issue in package. -- Julien Dutheil Mon, 28 Feb 2011 09:00:00 +0100 libbpp-popgen (2.0.0) unstable; urgency=low * Update for version 2.0 of Bio++. Code reorganization. -- Julien Dutheil Mon, 07 Feb 2011 09:00:00 +0100 libbpp-popgen (1.5.0) unstable; urgency=low * Update for version 1.9 of Bio++. Several interface improvements and bug fixed. -- Julien Dutheil Thu, 25 Mar 2010 15:14:55 +0100 libbpp-popgen (1.4.0) unstable; urgency=low * Update for version 1.8 of Bio++. -- Julien Dutheil Wed, 10 Jun 2009 11:28:58 +0100 libbpp-popgen (1.3.1) unstable; urgency=low * Bug fix release. -- Julien Dutheil Thu, 11 Dec 2008 12:21:37 +0100 libbpp-popgen (1.3.0) unstable; urgency=low * Initial Release. -- Julien Dutheil Mon, 21 Jul 2008 15:17:26 +0200 bpp-popgen-2.1.0/debian/libbpp-popgen6.install000644 000000 000000 00000000035 12147656633 021210 0ustar00rootroot000000 000000 debian/tmp/usr/lib/lib*.so.* bpp-popgen-2.1.0/debian/prerm000755 000000 000000 00000000616 12147656633 016054 0ustar00rootroot000000 000000 #! /bin/bash # Abort if any command returns an error value set -e removeGeneric() { if [ -f $1.all ] then echo "-- Remove generic include file: $1.all" rm $1.all fi for file in "$1"/* do if [ -d "$file" ] then # Recursion: removeGeneric $file fi done } if [ "$1" = "remove" ]; then # Actualize .all files removeGeneric /usr/include/Bpp fi exit 0 bpp-popgen-2.1.0/debian/rules000755 000000 000000 00000005214 12147656633 016060 0ustar00rootroot000000 000000 #!/usr/bin/make -f # -*- makefile -*- # Sample debian/rules that uses debhelper. # This file was originally written by Joey Hess and Craig Small. # As a special exception, when this file is copied by dh-make into a # dh-make output file, you may use that output file without restriction. # This special exception was added by Craig Small in version 0.37 of dh-make. # 24/01/10 Modification for use with CMake by Julien Dutheil. # Uncomment this to turn on verbose mode. #export DH_VERBOSE=1 # These are used for cross-compiling and for saving the configure script # from having to guess our platform (since we know it already) DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) CFLAGS = -Wall -g ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS))) CFLAGS += -O0 else CFLAGS += -O2 endif # shared library versions version=`ls src/lib*.so.* | \ awk '{if (match($$0,/[0-9]+\.[0-9]+\.[0-9]+$$/)) print substr($$0,RSTART)}'` major=`ls src/lib*.so.* | \ awk '{if (match($$0,/\.so\.[0-9]+$$/)) print substr($$0,RSTART+4)}'` configure: cmake -DCMAKE_INSTALL_PREFIX=/usr . config.status: configure dh_testdir build: build-stamp build-stamp: config.status dh_testdir # Add here commands to compile the package. $(MAKE) touch $@ clean: dh_testdir dh_testroot # Add here commands to clean up after the build process. [ ! -f Makefile ] || $(MAKE) clean; [ ! -f Makefile ] || rm Makefile; [ ! -f src/Makefile ] || rm src/Makefile; rm -f config.sub config.guess rm -f build-stamp rm -f CMakeCache.txt rm -f *.cmake rm -f src/*.cmake #rm -f test/*.cmake rm -rf CMakeFiles rm -rf src/CMakeFiles #rm -rf test/CMakeFiles rm -rf _CPack_Packages #rm -rf Testing #rm -f DartConfiguration.tcl dh_clean install: build dh_testdir dh_testroot dh_prep dh_installdirs # Add here commands to install the package into debian/tmp $(MAKE) DESTDIR=$(CURDIR)/debian/tmp install # Build architecture-independent files here. binary-indep: build install # We have nothing to do by default. # Build architecture-dependent files here. binary-arch: build install dh_testdir dh_testroot dh_installchangelogs ChangeLog dh_installdocs dh_installexamples dh_install # dh_installmenu # dh_installdebconf # dh_installlogrotate # dh_installemacsen # dh_installpam # dh_installmime # dh_installinit # dh_installcron # dh_installinfo dh_installman dh_link dh_strip dh_compress dh_fixperms # dh_perl # dh_python dh_makeshlibs dh_installdeb dh_shlibdeps dh_gencontrol dh_md5sums dh_builddeb binary: binary-indep binary-arch .PHONY: build clean binary-indep binary-arch binary install bpp-popgen-2.1.0/debian/docs000644 000000 000000 00000000000 12147656633 015637 0ustar00rootroot000000 000000 bpp-popgen-2.1.0/debian/postrm000755 000000 000000 00000001524 12147656633 016252 0ustar00rootroot000000 000000 #! /bin/bash # Abort if any command returns an error value set -e createGeneric() { echo "-- Creating generic include file: $1.all" #Make sure we run into subdirectories first: dirs=() for file in "$1"/* do if [ -d "$file" ] then # Recursion: dirs+=( "$file" ) fi done for dir in ${dirs[@]} do createGeneric $dir done #Now list all files, including newly created .all files: if [ -f $1.all ] then rm $1.all fi dir=`basename $1` for file in "$1"/* do if [ -f "$file" ] && ( [ "${file##*.}" == "h" ] || [ "${file##*.}" == "all" ] ) then file=`basename $file` echo "#include \"$dir/$file\"" >> $1.all fi done; } if [ "$1" = "remove" ]; then # Automatically added by dh_makeshlibs ldconfig # Actualize .all files createGeneric /usr/include/Bpp fi exit 0 bpp-popgen-2.1.0/debian/compat000644 000000 000000 00000000002 12147656633 016174 0ustar00rootroot000000 000000 5 bpp-popgen-2.1.0/debian/postinst000755 000000 000000 00000001443 12147656633 016611 0ustar00rootroot000000 000000 #! /bin/bash # Abort if any command returns an error value set -e createGeneric() { echo "-- Creating generic include file: $1.all" #Make sure we run into subdirectories first: dirs=() for file in "$1"/* do if [ -d "$file" ] then # Recursion: dirs+=( "$file" ) fi done for dir in ${dirs[@]} do createGeneric $dir done #Now list all files, including newly created .all files: if [ -f $1.all ] then rm $1.all fi dir=`basename $1` for file in "$1"/* do if [ -f "$file" ] && ( [ "${file##*.}" == "h" ] || [ "${file##*.}" == "all" ] ) then file=`basename $file` echo "#include \"$dir/$file\"" >> $1.all fi done; } if [ "$1" = "configure" ]; then # Actualize .all files createGeneric /usr/include/Bpp fi exit 0 bpp-popgen-2.1.0/debian/control000644 000000 000000 00000001514 12147656633 016402 0ustar00rootroot000000 000000 Source: libbpp-popgen Section: libs Priority: optional Maintainer: Loic Dachary Uploaders: Julien Dutheil Build-Depends: debhelper (>= 5), cmake (>= 2.6), libbpp-core-dev (>= 2.1.0), libbpp-seq-dev (>= 2.1.0) Standards-Version: 3.9.1 Package: libbpp-popgen-dev Section: libdevel Architecture: any Depends: libbpp-popgen6 (= ${binary:Version}), ${misc:Depends}, libbpp-core-dev (>= 2.1.0), libbpp-seq-dev (>= 2.1.0) Description: Bio++ Population Genetics library development files. Contains the Bio++ classes for population genetics. Package: libbpp-popgen6 Section: libs Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends}, libbpp-core2 (>= 2.1.0), libbpp-seq9 (>= 2.1.0) Description: Bio++ Population Genetics library. Contains the Bio++ classes for population genetics. bpp-popgen-2.1.0/debian/source/format000644 000000 000000 00000000014 12147656633 017504 0ustar00rootroot000000 000000 3.0 (quilt) bpp-popgen-2.1.0/debian/libbpp-popgen-dev.install000644 000000 000000 00000000116 12147656633 021676 0ustar00rootroot000000 000000 debian/tmp/usr/include/* debian/tmp/usr/lib/lib*.a debian/tmp/usr/lib/lib*.so bpp-popgen-2.1.0/ChangeLog000644 000000 000000 00000005774 12147656633 015343 0ustar00rootroot000000 000000 07/03/13 -*- Version 2.1.0 -*- 06/05/13 Julien Dutheil * Fixed bug #67 * Fixed bug #8 09/02/12 -*- Version 2.0.3 -*- 09/06/11 -*- Version 2.0.2 -*- 08/06/11 Benoît Nabholz * Added Fst calculation from Hudson, Slatkin and Maddison 1992 (Genetics 132:153). 28/02/11 -*- Version 2.0.1 -*- 07/02/11 -*- Version 2.0.0 -*- 21/07/08 -*- Version 1.4.0 -*- 18/11/09 Sylain Gaillard * Switching from pointer to reference in many methods arguments and return values. * Introducing std::auto_ptr when methods need to return pointer to new object. 16/11/09 Sylvain Gaillard * No more "using namespace" in header files. 21/07/09 Sylvain Gaillard * Removed Coord and CoordsTools classes (move to Utils/Point2D and Utils/Point2DTools) * Code update to use Point2D class instead of Coord class 23/06/09 Julien Dutheil * Code update for compatibility with seqlib. Changed pointers to refs when retrieving sequences. 19/06/09 Sylvain Gaillard * Removed all 'using namespace' statement from header files 18/06/09 Sylvain Gaillard * Fix SequenceTools::getFrequencies call in SequenceStatistics * Fix SequenceStatistics::getTransitionsTransversionsRatio 21/07/08 -*- Version 1.4.0 -*- 15/04/09 Sylvain Gaillard * Moved to UTF8 encoding * Fixed computations in Fu & Li D statistics 08/04/09 Sylvain Gaillard * Fixed calls to Seq::SymbolListTools::getCounts 31/03/09 Sylvain Gaillard * Fixed Doxygen documentation @author tags * Fixed warnings in bpp::DarwinDon::write bpp::DarwinVarSingle::write and bpp::MonolocusGenotypeTools::buildMonolocusGenotypeByAlleleKey 19/01/09 Sylvain Gaillard * Fixed bug in PolymorphismSequenceContainerTools::getSelectedSequences now set the sequence count properly. 16/01/09 Sylvain Gaillard * Fixed method PolymorphismSequenceContainerTools::extractGroup which now return the extracted group and not all groups but the extracted one. 08/01/09 Sylvain Gaillard * New method sample in PolymorphismSequenceContainerTools. * Extend main page doxygen documentation. * A bit of cleaning. 07/01/09 Céline Scornavacca & Julien Dutheil * Bug fixed in clone method of PolymorphismSequenceContainer. 21/07/08 -*- Version 1.3.1 -*- 10/11/08 Sylvain Gaillard * Fixed a bug in DataSet::getGroupName(). 21/07/08 -*- Version 1.3.0 -*- 09/04/08 Sylvain Gaillard * Added DarwinDon and DarwinVarSingle output class. 04/04/08 Sylvain Gaillard * Added MultiAlleleMonolocusGenotype and MonolocusGenotypeTools class. * Added GeneMapperCsvExport input class. 01/04/08 Sylvain Gaillard * Fixed bug in SequenceStatistics::DVH (thanks to Alicia). 18/01/08 -*- Version 1.2.0 -*- 12/01/08 Julien Dutheil * Compatibility update: inclusion in namespace bpp + code update. 06/07/07 -*- Version 1.1.1 -*- 15/05/07 Julien Dutheil * Compatibility update (NumCalc). * PopGenLib does not depend anymore on PhylLib. * Coord and Locality classes code improved. 28/08/06 -*- Version 1.1.0 -*- 21/06/06 Khalid Belkhir * Nouvelles fonctionnalités pour les permutations. * Ajout Fstat multilocus. * Construction de matrices de distances. * Correction de bugs bpp-popgen-2.1.0/Doxyfile000644 000000 000000 00000240223 12147656633 015265 0ustar00rootroot000000 000000 # Doxyfile 1.8.3.1-20130209 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project # # All text after a hash (#) is considered a comment and will be ignored # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" ") #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # http://www.gnu.org/software/libiconv for the list of possible encodings. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or sequence of words) that should # identify the project. Note that if you do not use Doxywizard you need # to put quotes around the project name if it contains spaces. PROJECT_NAME = bpp-popgen # The PROJECT_NUMBER tag can be used to enter a project or revision number. # This could be handy for archiving the generated documentation or # if some version control system is used. PROJECT_NUMBER = 2.1.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer # a quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify an logo or icon that is # included in the documentation. The maximum height of the logo should not # exceed 55 pixels and the maximum width should not exceed 200 pixels. # Doxygen will copy the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. OUTPUT_DIRECTORY = # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output # format and will distribute the generated files over these directories. # Enabling this option can be useful when feeding doxygen a huge amount of # source files, where putting all generated files in the same directory would # otherwise cause performance problems for the file system. CREATE_SUBDIRS = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # The default language is English, other supported languages are: # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English # messages), Korean, Korean-en, Latvian, Lithuanian, Norwegian, Macedonian, Persian, # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will # include brief member descriptions after the members that are listed in # the file and class documentation (similar to JavaDoc). # Set to NO to disable this. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend # the brief description of a member or function before the detailed description. # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator # that is used to form the text in various listings. Each string # in this list, if found as the leading text of the brief description, will be # stripped from the text and the result after processing the whole list, is # used as the annotated text. Otherwise, the brief description is used as-is. # If left blank, the following values are used ("$name" is automatically # replaced with the name of the entity): "The $name class" "The $name widget" # "The $name file" "is" "provides" "specifies" "contains" # "represents" "a" "an" "the" ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # Doxygen will generate a detailed section even if there is only a brief # description. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. INLINE_INHERITED_MEMB = YES # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full # path before files name in the file list and in the header files. If set # to NO the shortest path that makes the file name unique will be used. FULL_PATH_NAMES = YES # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag # can be used to strip a user-defined part of the path. Stripping is # only done if one of the specified strings matches the left-hand part of # the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the # path to strip. Note that you specify absolute paths here, but also # relative paths, which will be relative from the directory where doxygen is # started. STRIP_FROM_PATH = ./src/ # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of # the path mentioned in the documentation of a class, which tells # the reader which header file to include in order to use a class. # If left blank only the name of the header file containing the class # definition is used. Otherwise one should specify the include paths that # are normally passed to the compiler using the -I flag. STRIP_FROM_INC_PATH = ./src/ # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter # (but less readable) file names. This can be useful if your file system # doesn't support long names like on DOS, Mac, or CD-ROM. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen # will interpret the first line (until the first dot) of a JavaDoc-style # comment as the brief description. If set to NO, the JavaDoc # comments will behave just like regular Qt-style comments # (thus requiring an explicit @brief command for a brief description.) JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then Doxygen will # interpret the first line (until the first dot) of a Qt-style # comment as the brief description. If set to NO, the comments # will behave just like regular Qt-style comments (thus requiring # an explicit \brief command for a brief description.) QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen # treat a multi-line C++ special comment block (i.e. a block of //! or /// # comments) as a brief description. This used to be the default behaviour. # The new default is to treat a multi-line C++ comment block as a detailed # description. Set this tag to YES if you prefer the old behaviour instead. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented # member inherits the documentation from any documented member that it # re-implements. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce # a new page for each member. If set to NO, the documentation of a member will # be part of the file/class/namespace that contains it. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. # Doxygen uses this value to replace tabs by spaces in code fragments. TAB_SIZE = 2 # This tag can be used to specify a number of aliases that acts # as commands in the documentation. An alias has the form "name=value". # For example adding "sideeffect=\par Side Effects:\n" will allow you to # put the command \sideeffect (or @sideeffect) in the documentation, which # will result in a user-defined paragraph with heading "Side Effects:". # You can put \n's in the value part of an alias to insert newlines. ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding # "class=itcl::class" will allow you to use the command class in the # itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C # sources only. Doxygen will then generate output that is more tailored for C. # For instance, some of the names that are used will be different. The list # of all members will be omitted, etc. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java # sources only. Doxygen will then generate output that is more tailored for # Java. For instance, namespaces will be presented as packages, qualified # scopes will look different, etc. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources only. Doxygen will then generate output that is more tailored for # Fortran. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for # VHDL. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, # and language is one of the parsers supported by doxygen: IDL, Java, # Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, # C++. For instance to make doxygen treat .inc files as Fortran files (default # is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note # that for custom extensions you also need to set FILE_PATTERNS otherwise the # files are not read by doxygen. EXTENSION_MAPPING = # If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all # comments according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you # can mix doxygen, HTML, and XML commands with Markdown formatting. # Disable only in case of backward compatibilities issues. MARKDOWN_SUPPORT = YES # When enabled doxygen tries to link words that correspond to documented classes, # or namespaces to their corresponding documentation. Such a link can be # prevented in individual cases by by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should # set this tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); v.s. # func(std::string) {}). This also makes the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. BUILTIN_STL_SUPPORT = YES # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. # Doxygen will parse them like normal C++ but will assume all classes use public # instead of private inheritance when no explicit protection keyword is present. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES (the # default) will make doxygen replace the get and set methods by a property in # the documentation. This will only work if the methods are indeed getting or # setting a simple type. If this is not the case, or you want to show the # methods anyway, you should set this option to NO. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES (the default) to allow class member groups of # the same type (for instance a group of public functions) to be put as a # subgroup of that type (e.g. under the Public Functions section). Set it to # NO to prevent subgrouping. Alternatively, this can be done per class using # the \nosubgrouping command. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and # unions are shown inside the group in which they are included (e.g. using # @ingroup) instead of on a separate page (for HTML and Man pages) or # section (for LaTeX and RTF). INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and # unions with only public data fields or simple typedef fields will be shown # inline in the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO (the default), structs, classes, and unions are shown on a separate # page (for HTML and Man pages) or section (for LaTeX and RTF). INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum # is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically # be useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. TYPEDEF_HIDES_STRUCT = NO # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to # determine which symbols to keep in memory and which to flush to disk. # When the cache is full, less often used symbols will be written to disk. # For small to medium size projects (<1000 input files) the default value is # probably good enough. For larger projects a too small cache size can cause # doxygen to be busy swapping symbols to and from disk most of the time # causing a significant performance penalty. # If the system has enough physical memory increasing the cache will improve the # performance by keeping more symbols in memory. Note that the value works on # a logarithmic scale so increasing the size by one will roughly double the # memory usage. The cache size is given by this formula: # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols. SYMBOL_CACHE_SIZE = 0 # Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be # set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given # their name and scope. Since this can be an expensive process and often the # same symbol appear multiple times in the code, doxygen keeps a cache of # pre-resolved symbols. If the cache is too small doxygen will become slower. # If the cache is too large, memory is wasted. The cache size is given by this # formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. # Private class members and static file members will be hidden unless # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES all private members of a class # will be included in the documentation. EXTRACT_PRIVATE = YES # If the EXTRACT_PACKAGE tag is set to YES all members with package or internal # scope will be included in the documentation. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local # methods, which are defined in the implementation section but not in # the interface are included in the documentation. # If set to NO (the default) only methods in the interface are included. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base # name of the file that contains the anonymous namespace. By default # anonymous namespaces are hidden. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all # undocumented members of documented classes, files or namespaces. # If set to NO (the default) these members will be included in the # various overviews, but no documentation section is generated. # This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. # If set to NO (the default) these classes will be included in the various # overviews. This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all # friend (class|struct|union) declarations. # If set to NO (the default) these declarations will be included in the # documentation. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any # documentation blocks found inside the body of a function. # If set to NO (the default) these blocks will be appended to the # function's detailed documentation block. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation # that is typed after a \internal command is included. If the tag is set # to NO (the default) then the documentation will be excluded. # Set it to YES to include the internal documentation. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate # file names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen # will show members with their full class and namespace scopes in the # documentation. If set to YES the scope will be hidden. HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen # will put a list of the files that are included by a file in the documentation # of that file. SHOW_INCLUDE_FILES = YES # If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen # will list include files with double quotes in the documentation # rather than with sharp brackets. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES (the default) then a tag [inline] # is inserted in the documentation for inline members. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen # will sort the (detailed) documentation of file and class members # alphabetically by member name. If set to NO the members will appear in # declaration order. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the # brief documentation of file, namespace and class members alphabetically # by member name. If set to NO (the default) the members will appear in # declaration order. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen # will sort the (brief and detailed) documentation of class members so that # constructors and destructors are listed first. If set to NO (the default) # the constructors will appear in the respective orders defined by # SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. # This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO # and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the # hierarchy of group names into alphabetical order. If set to NO (the default) # the group names will appear in their defined order. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be # sorted by fully-qualified names, including namespaces. If set to # NO (the default), the class list will be sorted only by class name, # not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the # alphabetical list. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to # do proper type resolution of all parameters of a function it will reject a # match between the prototype and the implementation of a member function even # if there is only one candidate or it is obvious which candidate to choose # by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen # will still accept a match between prototype and implementation in such cases. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test # commands in the documentation. GENERATE_TESTLIST = NO # The GENERATE_BUGLIST tag can be used to enable (YES) or # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. GENERATE_BUGLIST = NO # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting # \deprecated commands in the documentation. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional # documentation sections, marked by \if section-label ... \endif # and \cond section-label ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines # the initial value of a variable or macro consists of for it to appear in # the documentation. If the initializer consists of more lines than specified # here it will be hidden. Use a value of 0 to hide initializers completely. # The appearance of the initializer of individual variables and macros in the # documentation can be controlled using \showinitializer or \hideinitializer # command in the documentation regardless of this setting. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated # at the bottom of the documentation of classes and structs. If set to YES the # list will mention the files that were used to generate the documentation. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. # This will remove the Files entry from the Quick Index and from the # Folder Tree View (if specified). The default is YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the # Namespaces page. This will remove the Namespaces entry from the Quick Index # and from the Folder Tree View (if specified). The default is YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command , where is the value of # the FILE_VERSION_FILTER tag, and is the name of an input file # provided by doxygen. Whatever the program writes to standard output # is used as the file version. See the manual for examples. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. # You can optionally specify a file name after the option, if omitted # DoxygenLayout.xml will be used as the name of the layout file. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files # containing the references data. This must be a list of .bib files. The # .bib extension is automatically appended if omitted. Using this command # requires the bibtex tool to be installed. See also # http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style # of the bibliography can be controlled using LATEX_BIB_STYLE. To use this # feature you need bibtex and perl available in the search path. Do not use # file names with spaces, bibtex cannot handle them. CITE_BIB_FILES = #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated # by doxygen. Possible values are YES and NO. If left blank NO is used. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated by doxygen. Possible values are YES and NO. If left blank # NO is used. WARNINGS = YES # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings # for undocumented members. If EXTRACT_ALL is set to YES then this flag will # automatically be disabled. WARN_IF_UNDOCUMENTED = YES # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some # parameters in a documented function, or documenting parameters that # don't exist or using markup commands wrongly. WARN_IF_DOC_ERROR = YES # The WARN_NO_PARAMDOC option can be enabled to get warnings for # functions that are documented, but have no documentation for their parameters # or return value. If set to NO (the default) doxygen will only warn about # wrong or incomplete parameter documentation, but not about the absence of # documentation. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that # doxygen can produce. The string should contain the $file, $line, and $text # tags, which will be replaced by the file and line number from which the # warning originated and the warning text. Optionally the format may contain # $version, which will be replaced by the version of the file (if it could # be obtained via FILE_VERSION_FILTER) WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning # and error messages should be written. If left blank the output is written # to stderr. WARN_LOGFILE = #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = src # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is # also the default input encoding. Doxygen uses libiconv (or the iconv built # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for # the list of possible encodings. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank the following patterns are tested: # *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh # *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py # *.f90 *.f *.for *.vhd *.vhdl FILE_PATTERNS = *.h \ *.cpp # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. Note that the wildcards are matched # against the file with absolute path, so to exclude all test directories # for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or # directories that contain example code fragments that are included (see # the \include command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank all files are included. EXAMPLE_PATTERNS = # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude # commands irrespective of the value of the RECURSIVE tag. # Possible values are YES and NO. If left blank NO is used. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or # directories that contain image that are included in the documentation (see # the \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command , where # is the value of the INPUT_FILTER tag, and is the name of an # input file. Doxygen will then use the output that the filter program writes # to standard output. If FILTER_PATTERNS is specified, this tag will be # ignored. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further # info on how filters are used. If FILTER_PATTERNS is empty or if # non of the patterns match the file name, INPUT_FILTER is applied. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will be used to filter the input files when producing source # files to browse (i.e. when SOURCE_BROWSER is set to YES). FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) # and it is also possible to disable source filtering for a specific pattern # using *.ext= (so without naming a filter). This option only has effect when # FILTER_SOURCE_FILES is enabled. FILTER_SOURCE_PATTERNS = # If the USE_MD_FILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page (index.html). # This can be useful if you have a project on for instance GitHub and want reuse # the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will # be generated. Documented entities will be cross-referenced with these sources. # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. SOURCE_BROWSER = YES # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code # fragments. Normal C, C++ and Fortran comments will always remain visible. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES # then for each documented function all documented # functions referencing it will be listed. REFERENCED_BY_RELATION = YES # If the REFERENCES_RELATION tag is set to YES # then for each documented function all documented entities # called/used by that function will be listed. REFERENCES_RELATION = YES # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will # link to the source code. Otherwise they will link to the documentation. REFERENCES_LINK_SOURCE = YES # If the USE_HTAGS tag is set to YES then the references to source code # will point to the HTML generated by the htags(1) tool instead of doxygen # built-in source browser. The htags tool is part of GNU's global source # tagging system (see http://www.gnu.org/software/global/global.html). You # will need version 4.8.6 or higher. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen # will generate a verbatim copy of the header file for each class for # which an include is specified. Set to NO to disable this. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index # of all compounds will be generated. Enable this if the project # contains a lot of classes, structs, unions or interfaces. ALPHABETICAL_INDEX = NO # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns # in which this list will be split (can be a number in the range [1..20]) COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all # classes will be put under the same header in the alphabetical index. # The IGNORE_PREFIX tag can be used to specify one or more prefixes that # should be ignored while generating the index headers. IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES (the default) Doxygen will # generate HTML output. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `html' will be used as the default path. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for # each generated HTML page (for example: .htm,.php,.asp). If it is left blank # doxygen will generate files with .html extension. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a personal HTML header for # each generated HTML page. If it is left blank doxygen will generate a # standard header. Note that when using a custom header you are responsible # for the proper inclusion of any scripts and style sheets that doxygen # needs, which is dependent on the configuration options used. # It is advised to generate a default header using "doxygen -w html # header.html footer.html stylesheet.css YourConfigFile" and then modify # that header. Note that the header is subject to change so you typically # have to redo this when upgrading to a newer version of doxygen or when # changing the value of configuration settings such as GENERATE_TREEVIEW! HTML_HEADER = # The HTML_FOOTER tag can be used to specify a personal HTML footer for # each generated HTML page. If it is left blank doxygen will generate a # standard footer. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to # fine-tune the look of the HTML output. If left blank doxygen will # generate a default style sheet. Note that it is recommended to use # HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this # tag will in the future become obsolete. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify an additional # user-defined cascading style sheet that is included after the standard # style sheets created by doxygen. Using this option one can overrule # certain style aspects. This is preferred over using HTML_STYLESHEET # since it does not replace the standard style sheet and is therefor more # robust against future updates. Doxygen will copy the style sheet file to # the output directory. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that # the files will be copied as-is; there are no commands or markers available. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. # Doxygen will adjust the colors in the style sheet and background images # according to this color. Hue is specified as an angle on a colorwheel, # see http://en.wikipedia.org/wiki/Hue for more information. # For instance the value 0 represents red, 60 is yellow, 120 is green, # 180 is cyan, 240 is blue, 300 purple, and 360 is red again. # The allowed range is 0 to 359. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of # the colors in the HTML output. For a value of 0 the output will use # grayscales only. A value of 255 will produce the most vivid colors. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to # the luminance component of the colors in the HTML output. Values below # 100 gradually make the output lighter, whereas values above 100 make # the output darker. The value divided by 100 is the actual gamma applied, # so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, # and 100 does not change the gamma. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting # this to NO can help when comparing the output of multiple runs. HTML_TIMESTAMP = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. HTML_DYNAMIC_SECTIONS = YES # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of # entries shown in the various tree structured indices initially; the user # can expand and collapse entries dynamically later on. Doxygen will expand # the tree to such a level that at most the specified number of entries are # visible (unless a fully collapsed tree already exceeds this amount). # So setting the number of entries 1 will produce a full collapsed tree by # default. 0 is a special value representing an infinite number of entries # and will result in a full expanded tree by default. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files # will be generated that can be used as input for Apple's Xcode 3 # integrated development environment, introduced with OSX 10.5 (Leopard). # To create a documentation set, doxygen will generate a Makefile in the # HTML output directory. Running make will produce the docset in that # directory and running "make install" will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find # it at startup. # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. GENERATE_DOCSET = YES # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the # feed. A documentation feed provides an umbrella under which multiple # documentation sets from a single provider (such as a company or product suite) # can be grouped. DOCSET_FEEDNAME = "Bio++ Population Genetics Library" # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that # should uniquely identify the documentation set bundle. This should be a # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen # will append .docset to the name. DOCSET_BUNDLE_ID = bpp.popgen # When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely # identify the documentation publisher. This should be a reverse domain-name # style string, e.g. com.mycompany.MyDocSet.documentation. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES, additional index files # will be generated that can be used as input for tools like the # Microsoft HTML help workshop to generate a compiled HTML help file (.chm) # of the generated HTML documentation. GENERATE_HTMLHELP = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can # be used to specify the file name of the resulting .chm file. You # can add a path in front of the file if the result should not be # written to the html output directory. CHM_FILE = # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can # be used to specify the location (absolute path including file name) of # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run # the HTML help compiler on the generated index.hhp. HHC_LOCATION = # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag # controls if a separate .chi index file is generated (YES) or that # it should be included in the master .chm file (NO). GENERATE_CHI = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING # is used to encode HtmlHelp index (hhk), content (hhc) and project file # content. CHM_INDEX_ENCODING = # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag # controls whether a binary table of contents is generated (YES) or a # normal table of contents (NO) in the .chm file. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members # to the contents of the HTML help documentation and to the tree view. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated # that can be used as input for Qt's qhelpgenerator to generate a # Qt Compressed Help (.qch) of the generated HTML documentation. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can # be used to specify the file name of the resulting .qch file. # The path specified is relative to the HTML output folder. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#namespace QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#virtual-folders QHP_VIRTUAL_FOLDER = doc # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to # add. For more information please see # http://doc.trolltech.com/qthelpproject.html#custom-filters QHP_CUST_FILTER_NAME = # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see # # Qt Help Project / Custom Filters. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's # filter section matches. # # Qt Help Project / Filter Attributes. QHP_SECT_FILTER_ATTRS = # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can # be used to specify the location of Qt's qhelpgenerator. # If non-empty doxygen will try to run qhelpgenerator on the generated # .qhp file. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files # will be generated, which together with the HTML files, form an Eclipse help # plugin. To install this plugin and make it available under the help contents # menu in Eclipse, the contents of the directory containing the HTML and XML # files needs to be copied into the plugins directory of eclipse. The name of # the directory within the plugins directory should be the same as # the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before # the help appears. GENERATE_ECLIPSEHELP = NO # A unique identifier for the eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have # this name. ECLIPSE_DOC_ID = org.doxygen.Project # The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) # at top of each HTML page. The value NO (the default) enables the index and # the value YES disables it. Since the tabs have the same information as the # navigation tree you can set this option to NO if you already set # GENERATE_TREEVIEW to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. # If the tag value is set to YES, a side panel will be generated # containing a tree-like index structure (just like the one that # is generated for HTML Help). For this to work a browser that supports # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). # Windows users are probably better off using the HTML help feature. # Since the tree basically has the same information as the tab index you # could consider to set DISABLE_INDEX to NO when enabling this option. GENERATE_TREEVIEW = YES # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values # (range [0,1..20]) that doxygen will group on one line in the generated HTML # documentation. Note that a value of 0 will completely suppress the enum # values from appearing in the overview section. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree # is shown. TREEVIEW_WIDTH = 250 # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open # links to external symbols imported via tag files in a separate window. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of Latex formulas included # as images in the HTML documentation. The default is 10. Note that # when you change the font size after a successful doxygen run you need # to manually remove any form_*.png images from the HTML output directory # to force them to be regenerated. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are # not supported properly for IE 6.0, but are supported on all modern browsers. # Note that when changing this option you need to delete any form_*.png files # in the HTML output before the changes have effect. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax # (see http://www.mathjax.org) which uses client side Javascript for the # rendering instead of using prerendered bitmaps. Use this if you do not # have LaTeX installed or if you want to formulas look prettier in the HTML # output. When enabled you may also need to install MathJax separately and # configure the path to it using the MATHJAX_RELPATH option. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # thA MathJax output. Supported types are HTML-CSS, NativeMML (i.e. MathML) and # SVG. The default value is HTML-CSS, which is slower, but has the best # compatibility. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the # HTML output directory using the MATHJAX_RELPATH option. The destination # directory should contain the MathJax.js script. For instance, if the mathjax # directory is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to # the MathJax Content Delivery Network so you can quickly see the result without # installing MathJax. However, it is strongly recommended to install a local # copy of MathJax from http://www.mathjax.org before deployment. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension # names that should be enabled during MathJax rendering. MATHJAX_EXTENSIONS = # When the SEARCHENGINE tag is enabled doxygen will generate a search box # for the HTML output. The underlying search engine uses javascript # and DHTML and should work on any modern browser. Note that when using # HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets # (GENERATE_DOCSET) there is already a search function so this one should # typically be disabled. For large projects the javascript based search engine # can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. SEARCHENGINE = YES # When the SERVER_BASED_SEARCH tag is enabled the search engine will be # implemented using a web server instead of a web client using Javascript. # There are two flavours of web server based search depending on the # EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for # searching and an index file used by the script. When EXTERNAL_SEARCH is # enabled the indexing and searching needs to be provided by external tools. # See the manual for details. SERVER_BASED_SEARCH = NO # When EXTERNAL_SEARCH is enabled doxygen will no longer generate the PHP # script for searching. Instead the search results are written to an XML file # which needs to be processed by an external indexer. Doxygen will invoke an # external search engine pointed to by the SEARCHENGINE_URL option to obtain # the search results. Doxygen ships with an example indexer (doxyindexer) and # search engine (doxysearch.cgi) which are based on the open source search engine # library Xapian. See the manual for configuration details. EXTERNAL_SEARCH = NO # The SEARCHENGINE_URL should point to a search engine hosted by a web server # which will returned the search results when EXTERNAL_SEARCH is enabled. # Doxygen ships with an example search engine (doxysearch) which is based on # the open source search engine library Xapian. See the manual for configuration # details. SEARCHENGINE_URL = # When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed # search data is written to a file for indexing by an external tool. With the # SEARCHDATA_FILE tag the name of this file can be specified. SEARCHDATA_FILE = searchdata.xml # When SERVER_BASED_SEARCH AND EXTERNAL_SEARCH are both enabled the # EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is # useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple # projects and redirect the results back to the right project. EXTERNAL_SEARCH_ID = # The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen # projects other than the one defined by this configuration file, but that are # all added to the same external search index. Each project needs to have a # unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id # of to a relative location where the documentation can be found. # The format is: EXTRA_SEARCH_MAPPINGS = id1=loc1 id2=loc2 ... EXTRA_SEARCH_MAPPINGS = #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will # generate Latex output. GENERATE_LATEX = NO # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `latex' will be used as the default path. LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. If left blank `latex' will be used as the default command name. # Note that when enabling USE_PDFLATEX this option is only used for # generating bitmaps for formulas in the HTML output, but not in the # Makefile that is written to the output directory. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to # generate index for LaTeX. If left blank `makeindex' will be used as the # default command name. MAKEINDEX_CMD_NAME = makeindex # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact # LaTeX documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_LATEX = NO # The PAPER_TYPE tag can be used to set the paper type that is used # by the printer. Possible values are: a4, letter, legal and # executive. If left blank a4wide will be used. PAPER_TYPE = a4wide # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX # packages that should be included in the LaTeX output. EXTRA_PACKAGES = amsmath # The LATEX_HEADER tag can be used to specify a personal LaTeX header for # the generated latex document. The header should contain everything until # the first chapter. If it is left blank doxygen will generate a # standard header. Notice: only use this tag if you know what you are doing! LATEX_HEADER = # The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for # the generated latex document. The footer should contain everything after # the last chapter. If it is left blank doxygen will generate a # standard footer. Notice: only use this tag if you know what you are doing! LATEX_FOOTER = # The LATEX_EXTRA_FILES tag can be used to specify one or more extra images # or other source files which should be copied to the LaTeX output directory. # Note that the files will be copied as-is; there are no commands or markers # available. LATEX_EXTRA_FILES = # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated # is prepared for conversion to pdf (using ps2pdf). The pdf file will # contain links (just like the HTML output) instead of page references # This makes the output suitable for online browsing using a pdf viewer. PDF_HYPERLINKS = NO # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of # plain latex in the generated Makefile. Set this option to YES to get a # higher quality PDF documentation. USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. # command to the generated LaTeX files. This will instruct LaTeX to keep # running if errors occur, instead of asking the user for help. # This option is also used when generating formulas in HTML. LATEX_BATCHMODE = NO # If LATEX_HIDE_INDICES is set to YES then doxygen will not # include the index chapters (such as File Index, Compound Index, etc.) # in the output. LATEX_HIDE_INDICES = NO # If LATEX_SOURCE_CODE is set to YES then doxygen will include # source code with syntax highlighting in the LaTeX output. # Note that which sources are shown also depends on other settings # such as SOURCE_BROWSER. LATEX_SOURCE_CODE = NO # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See # http://en.wikipedia.org/wiki/BibTeX for more info. LATEX_BIB_STYLE = plain #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output # The RTF output is optimized for Word 97 and may not look very pretty with # other RTF readers or editors. GENERATE_RTF = NO # The RTF_OUTPUT tag is used to specify where the RTF docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `rtf' will be used as the default path. RTF_OUTPUT = rtf # If the COMPACT_RTF tag is set to YES Doxygen generates more compact # RTF documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_RTF = NO # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated # will contain hyperlink fields. The RTF file will # contain links (just like the HTML output) instead of page references. # This makes the output suitable for online browsing using WORD or other # programs which support those fields. # Note: wordpad (write) and others do not support links. RTF_HYPERLINKS = NO # Load style sheet definitions from file. Syntax is similar to doxygen's # config file, i.e. a series of assignments. You only have to provide # replacements, missing definitions are set to their default value. RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an rtf document. # Syntax is similar to doxygen's config file. RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- # If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = NO # The MAN_OUTPUT tag is used to specify where the man pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `man' will be used as the default path. MAN_OUTPUT = man # The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .3 # If the MAN_LINKS tag is set to YES and Doxygen generates man output, # then it will generate one additional man file for each entity # documented in the real man page(s). These additional files # only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = NO #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- # If the GENERATE_XML tag is set to YES Doxygen will # generate an XML file that captures the structure of # the code including all documentation. GENERATE_XML = NO # The XML_OUTPUT tag is used to specify where the XML pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `xml' will be used as the default path. XML_OUTPUT = xml # The XML_SCHEMA tag can be used to specify an XML schema, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_SCHEMA = # The XML_DTD tag can be used to specify an XML DTD, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_DTD = # If the XML_PROGRAMLISTING tag is set to YES Doxygen will # dump the program listings (including syntax highlighting # and cross-referencing information) to the XML output. Note that # enabling this will significantly increase the size of the XML output. XML_PROGRAMLISTING = YES #--------------------------------------------------------------------------- # configuration options related to the DOCBOOK output #--------------------------------------------------------------------------- # If the GENERATE_DOCBOOK tag is set to YES Doxygen will generate DOCBOOK files # that can be used to generate PDF. GENERATE_DOCBOOK = NO # The DOCBOOK_OUTPUT tag is used to specify where the DOCBOOK pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be put in # front of it. If left blank docbook will be used as the default path. DOCBOOK_OUTPUT = docbook #--------------------------------------------------------------------------- # configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will # generate an AutoGen Definitions (see autogen.sf.net) file # that captures the structure of the code including all # documentation. Note that this feature is still experimental # and incomplete at the moment. GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # configuration options related to the Perl module output #--------------------------------------------------------------------------- # If the GENERATE_PERLMOD tag is set to YES Doxygen will # generate a Perl module file that captures the structure of # the code including all documentation. Note that this # feature is still experimental and incomplete at the # moment. GENERATE_PERLMOD = NO # If the PERLMOD_LATEX tag is set to YES Doxygen will generate # the necessary Makefile rules, Perl scripts and LaTeX code to be able # to generate PDF and DVI output from the Perl module output. PERLMOD_LATEX = NO # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be # nicely formatted so it can be parsed by a human reader. This is useful # if you want to understand what is going on. On the other hand, if this # tag is set to NO the size of the Perl module output will be much smaller # and Perl will parse it just the same. PERLMOD_PRETTY = YES # The names of the make variables in the generated doxyrules.make file # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. # This is useful so different doxyrules.make files included by the same # Makefile don't overwrite each other's variables. PERLMOD_MAKEVAR_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will # evaluate all C-preprocessor directives found in the sources and include # files. ENABLE_PREPROCESSING = YES # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro # names in the source code. If set to NO (the default) only conditional # compilation will be performed. Macro expansion can be done in a controlled # way by setting EXPAND_ONLY_PREDEF to YES. MACRO_EXPANSION = NO # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES # then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. EXPAND_ONLY_PREDEF = NO # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files # pointed to by INCLUDE_PATH will be searched when a #include is found. SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by # the preprocessor. INCLUDE_PATH = # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the # directories. If left blank, the patterns specified with FILE_PATTERNS will # be used. INCLUDE_FILE_PATTERNS = # The PREDEFINED tag can be used to specify one or more macro names that # are defined before the preprocessor is started (similar to the -D option of # gcc). The argument of the tag is a list of macros of the form: name # or name=definition (no spaces). If the definition and the = are # omitted =1 is assumed. To prevent a macro definition from being # undefined via #undef or recursively expanded use the := operator # instead of the = operator. PREDEFINED = # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. # The macro definition that is found in the sources will be used. # Use the PREDEFINED tag if you want to use a different macro definition that # overrules the definition found in the source code. EXPAND_AS_DEFINED = # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all references to function-like macros # that are alone on a line, have an all uppercase name, and do not end with a # semicolon, because these will confuse the parser if not removed. SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration::additions related to external references #--------------------------------------------------------------------------- # The TAGFILES option can be used to specify one or more tagfiles. For each # tag file the location of the external documentation should be added. The # format of a tag file without this location is as follows: # TAGFILES = file1 file2 ... # Adding location for the tag files is done as follows: # TAGFILES = file1=loc1 "file2 = loc2" ... # where "loc1" and "loc2" can be relative or absolute paths # or URLs. Note that each tag file must have a unique name (where the name does # NOT include the path). If a tag file is not located in the directory in which # doxygen is run, you must also specify the path to the tagfile here. TAGFILES = ../bpp-core/BppCore.tag=../../bpp-core/html \ ../bpp-seq/BppSeq.tag=../../bpp-seq/html # When a file name is specified after GENERATE_TAGFILE, doxygen will create # a tag file that is based on the input files it reads. GENERATE_TAGFILE = BppPopGen.tag # If the ALLEXTERNALS tag is set to YES all external classes will be listed # in the class index. If set to NO only the inherited external classes # will be listed. ALLEXTERNALS = NO # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed # in the modules index. If set to NO, only the current project's groups will # be listed. EXTERNAL_GROUPS = YES # If the EXTERNAL_PAGES tag is set to YES all external pages will be listed # in the related pages index. If set to NO, only the current project's # pages will be listed. EXTERNAL_PAGES = YES # The PERL_PATH should be the absolute path and name of the perl script # interpreter (i.e. the result of `which perl'). PERL_PATH = /usr/bin/perl #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base # or super classes. Setting the tag to NO turns the diagrams off. Note that # this option also works with HAVE_DOT disabled, but it is recommended to # install and use dot, since it yields more powerful graphs. CLASS_DIAGRAMS = YES # You can define message sequence charts within doxygen comments using the \msc # command. Doxygen will then run the mscgen tool (see # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the # documentation. The MSCGEN_PATH tag allows you to specify the directory where # the mscgen tool resides. If left empty the tool is assumed to be found in the # default search path. MSCGEN_PATH = # If set to YES, the inheritance and collaboration graphs will hide # inheritance and usage relations if the target is undocumented # or is not a class. HIDE_UNDOC_RELATIONS = YES # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz, a graph visualization # toolkit from AT&T and Lucent Bell Labs. The other options in this section # have no effect if this option is set to NO (the default) HAVE_DOT = YES # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is # allowed to run in parallel. When set to 0 (the default) doxygen will # base this on the number of processors available in the system. You can set it # explicitly to a value larger than 0 to get control over the balance # between CPU load and processing speed. DOT_NUM_THREADS = 0 # By default doxygen will use the Helvetica font for all dot files that # doxygen generates. When you want a differently looking font you can specify # the font name using DOT_FONTNAME. You need to make sure dot is able to find # the font, which can be done by putting it in a standard location or by setting # the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the # directory containing the font. DOT_FONTNAME = FreeSans # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. # The default size is 10pt. DOT_FONTSIZE = 10 # By default doxygen will tell dot to use the Helvetica font. # If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to # set the path where dot can find it. DOT_FONTPATH = # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect inheritance relations. Setting this tag to YES will force the # CLASS_DIAGRAMS tag to NO. CLASS_GRAPH = YES # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect implementation dependencies (inheritance, containment, and # class references variables) of the class with other documented classes. COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen # will generate a graph for groups, showing the direct groups dependencies GROUP_GRAPHS = YES # If the UML_LOOK tag is set to YES doxygen will generate inheritance and # collaboration diagrams in a style similar to the OMG's Unified Modeling # Language. UML_LOOK = NO # If the UML_LOOK tag is enabled, the fields and methods are shown inside # the class node. If there are many fields or methods and many nodes the # graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS # threshold limits the number of items for each type to make the size more # managable. Set this to 0 for no limit. Note that the threshold may be # exceeded by 50% before the limit is enforced. UML_LIMIT_NUM_FIELDS = 10 # If set to YES, the inheritance and collaboration graphs will show the # relations between templates and their instances. TEMPLATE_RELATIONS = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT # tags are set to YES then doxygen will generate a graph for each documented # file showing the direct and indirect include dependencies of the file with # other documented files. INCLUDE_GRAPH = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and # HAVE_DOT tags are set to YES then doxygen will generate a graph for each # documented header file showing the documented files that directly or # indirectly include this file. INCLUDED_BY_GRAPH = YES # If the CALL_GRAPH and HAVE_DOT options are set to YES then # doxygen will generate a call dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable call graphs # for selected functions only using the \callgraph command. CALL_GRAPH = NO # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then # doxygen will generate a caller dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable caller # graphs for selected functions only using the \callergraph command. CALLER_GRAPH = NO # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will generate a graphical hierarchy of all classes instead of a textual one. GRAPHICAL_HIERARCHY = YES # If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES # then doxygen will show the dependencies a directory has on other directories # in a graphical way. The dependency relations are determined by the #include # relations between the files in the directories. DIRECTORY_GRAPH = YES # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. Possible values are svg, png, jpg, or gif. # If left blank png will be used. If you choose svg you need to set # HTML_FILE_EXTENSION to xhtml in order to make the SVG files # visible in IE 9+ (other browsers do not have this requirement). DOT_IMAGE_FORMAT = png # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to # enable generation of interactive SVG images that allow zooming and panning. # Note that this requires a modern browser other than Internet Explorer. # Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you # need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files # visible. Older versions of IE do not have SVG support. INTERACTIVE_SVG = NO # The tag DOT_PATH can be used to specify the path where the dot tool can be # found. If left blank, it is assumed the dot tool can be found in the path. DOT_PATH = # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the # \dotfile command). DOTFILE_DIRS = # The MSCFILE_DIRS tag can be used to specify one or more directories that # contain msc files that are included in the documentation (see the # \mscfile command). MSCFILE_DIRS = # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of # nodes that will be shown in the graph. If the number of nodes in a graph # becomes larger than this value, doxygen will truncate the graph, which is # visualized by representing a node as a red box. Note that doxygen if the # number of direct children of the root node in a graph is already larger than # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. DOT_GRAPH_MAX_NODES = 50 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the # graphs generated by dot. A depth value of 3 means that only nodes reachable # from the root by following a path via at most 3 edges will be shown. Nodes # that lay further from the root node will be omitted. Note that setting this # option to 1 or 2 may greatly reduce the computation time needed for large # code bases. Also note that the size of a graph can be further restricted by # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. MAX_DOT_GRAPH_DEPTH = 0 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent # background. This is disabled by default, because dot on Windows does not # seem to support this out of the box. Warning: Depending on the platform used, # enabling this option may lead to badly anti-aliased labels on the edges of # a graph (i.e. they become hard to read). DOT_TRANSPARENT = NO # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) # support this, this feature is disabled by default. DOT_MULTI_TARGETS = YES # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will # generate a legend page explaining the meaning of the various boxes and # arrows in the dot generated graphs. GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will # remove the intermediate dot files that are used to generate # the various graphs. DOT_CLEANUP = YES bpp-popgen-2.1.0/src/CMakeLists.txt000644 000000 000000 00000006251 12147656633 017107 0ustar00rootroot000000 000000 # CMake script for Bio++ PopGen # Author: Sylvain Gaillard # Created: 21/08/2009 # File list SET(CPP_FILES Bpp/PopGen/AbstractIDataSet.cpp Bpp/PopGen/AbstractODataSet.cpp Bpp/PopGen/AnalyzedLoci.cpp Bpp/PopGen/AnalyzedSequences.cpp Bpp/PopGen/BasicAlleleInfo.cpp Bpp/PopGen/BiAlleleMonolocusGenotype.cpp Bpp/PopGen/DarwinDon.cpp Bpp/PopGen/DarwinVarSingle.cpp Bpp/PopGen/DataSet.cpp Bpp/PopGen/DataSetTools.cpp Bpp/PopGen/Date.cpp Bpp/PopGen/GeneMapperCsvExport.cpp Bpp/PopGen/Genepop.cpp Bpp/PopGen/GeneralExceptions.cpp Bpp/PopGen/Genetix.cpp Bpp/PopGen/Group.cpp Bpp/PopGen/Individual.cpp Bpp/PopGen/LocusInfo.cpp Bpp/PopGen/MonoAlleleMonolocusGenotype.cpp Bpp/PopGen/MonolocusGenotypeTools.cpp Bpp/PopGen/MultiAlleleMonolocusGenotype.cpp Bpp/PopGen/MultiSeqIndividual.cpp Bpp/PopGen/MultilocusGenotype.cpp Bpp/PopGen/MultilocusGenotypeStatistics.cpp Bpp/PopGen/PolymorphismMultiGContainer.cpp Bpp/PopGen/PolymorphismMultiGContainerTools.cpp Bpp/PopGen/PolymorphismSequenceContainer.cpp Bpp/PopGen/PolymorphismSequenceContainerTools.cpp Bpp/PopGen/PopgenlibIO.cpp Bpp/PopGen/SequenceStatistics.cpp ) SET(H_FILES Bpp/PopGen/AbstractIDataSet.h Bpp/PopGen/AbstractODataSet.h Bpp/PopGen/AlleleInfo.h Bpp/PopGen/AnalyzedLoci.h Bpp/PopGen/AnalyzedSequences.h Bpp/PopGen/BasicAlleleInfo.h Bpp/PopGen/BiAlleleMonolocusGenotype.h Bpp/PopGen/DarwinDon.h Bpp/PopGen/DarwinVarSingle.h Bpp/PopGen/DataSet.h Bpp/PopGen/DataSetTools.h Bpp/PopGen/Date.h Bpp/PopGen/GeneMapperCsvExport.h Bpp/PopGen/Genepop.h Bpp/PopGen/GeneralExceptions.h Bpp/PopGen/Genetix.h Bpp/PopGen/Group.h Bpp/PopGen/IDataSet.h Bpp/PopGen/IODataSet.h Bpp/PopGen/Individual.h Bpp/PopGen/Locality.h Bpp/PopGen/LocusInfo.h Bpp/PopGen/MonoAlleleMonolocusGenotype.h Bpp/PopGen/MonolocusGenotype.h Bpp/PopGen/MonolocusGenotypeTools.h Bpp/PopGen/MultiAlleleMonolocusGenotype.h Bpp/PopGen/MultiSeqIndividual.h Bpp/PopGen/MultilocusGenotype.h Bpp/PopGen/MultilocusGenotypeStatistics.h Bpp/PopGen/ODataSet.h Bpp/PopGen/PolymorphismMultiGContainer.h Bpp/PopGen/PolymorphismMultiGContainerTools.h Bpp/PopGen/PolymorphismSequenceContainer.h Bpp/PopGen/PolymorphismSequenceContainerTools.h Bpp/PopGen/PopgenlibIO.h Bpp/PopGen/SequenceStatistics.h ) # Build the static lib ADD_LIBRARY(bpppopgen-static STATIC ${CPP_FILES}) SET_TARGET_PROPERTIES(bpppopgen-static PROPERTIES OUTPUT_NAME bpp-popgen CLEAN_DIRECT_OUTPUT 1 ) TARGET_LINK_LIBRARIES(bpppopgen-static ${LIBS}) # Build the shared lib ADD_LIBRARY(bpppopgen-shared SHARED ${CPP_FILES}) SET_TARGET_PROPERTIES(bpppopgen-shared PROPERTIES OUTPUT_NAME bpp-popgen CLEAN_DIRECT_OUTPUT 1 VERSION ${BPPPOPGEN_VERSION} SOVERSION ${BPPPOPGEN_VERSION_MAJOR} ) TARGET_LINK_LIBRARIES(bpppopgen-shared ${LIBS}) # Install libs INSTALL(TARGETS bpppopgen-static DESTINATION lib${LIB_SUFFIX}) INSTALL(TARGETS bpppopgen-shared DESTINATION lib${LIB_SUFFIX}) # Install headers INSTALL(DIRECTORY Bpp/ DESTINATION include/Bpp FILES_MATCHING PATTERN "*.h") # Generate generic include files (.all) INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${CMAKE_SOURCE_DIR}/genIncludes.sh ${CMAKE_PREFIX_PATH}/include/Bpp)") bpp-popgen-2.1.0/src/Bpp/PopGen/DataSet.cpp000644 000000 000000 00000152011 12147656633 020305 0ustar00rootroot000000 000000 // // File DataSet.cpp // Author : Sylvain Gaillard // Khalid Belkhir // Last modification : November 10, 2008 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "DataSet.h" using namespace bpp; using namespace std; // ** Class constructor: *******************************************************/ DataSet::DataSet() : analyzedLoci_(0), analyzedSequences_(0), localities_(vector*>()), groups_(vector()) {} /******************************************************************************/ DataSet::DataSet(const DataSet& ds) : analyzedLoci_(0), analyzedSequences_(0), localities_(vector*>()), groups_(vector()) { if (ds.analyzedLoci_ != 0) analyzedLoci_ = new AnalyzedLoci(*(ds.analyzedLoci_)); if (ds.analyzedSequences_ != 0) analyzedSequences_ = new AnalyzedSequences(*(ds.analyzedSequences_)); if (ds.localities_.size() != 0) for (size_t i = 0; i < ds.localities_.size(); i++) { localities_.push_back(new Locality(*(ds.localities_[i]))); } if (ds.groups_.size() != 0) for (size_t i = 0; i < ds.groups_.size(); i++) { groups_.push_back(new Group(*(ds.groups_[i]))); } } /******************************************************************************/ DataSet& DataSet::operator=(const DataSet& ds) { if (ds.analyzedLoci_ != 0) analyzedLoci_ = new AnalyzedLoci(*(ds.analyzedLoci_)); if (ds.analyzedSequences_ != 0) analyzedSequences_ = new AnalyzedSequences(*(ds.analyzedSequences_)); if (ds.localities_.size() != 0) for (size_t i = 0; i < ds.localities_.size(); i++) { localities_.push_back(new Locality(*(ds.localities_[i]))); } if (ds.groups_.size() != 0) for (size_t i = 0; i < ds.groups_.size(); i++) { groups_.push_back(new Group(*(ds.groups_[i]))); } return *this; } // ** Class destructor: *******************************************************/ DataSet::~DataSet() { if (getNumberOfGroups() > 0) for (size_t i = 0; i < getNumberOfGroups(); i++) { delete groups_[i]; } if (analyzedLoci_ != 0) delete analyzedLoci_; if (getNumberOfLocalities() > 0) for (size_t i = 0; i < getNumberOfLocalities(); i++) { delete localities_[i]; } if (analyzedSequences_ != 0) delete analyzedSequences_; } // ** Other methodes: *********************************************************/ // Dealing with Localities --------------------------------- void DataSet::addLocality(Locality& locality) throw (BadIdentifierException) { for (size_t i = 0; i < localities_.size(); i++) { if (localities_[i]->getName() == locality.getName()) throw BadIdentifierException("DataSet::addLocality: locality name already in use.", locality.getName()); } localities_.push_back(new Locality(locality)); } /******************************************************************************/ size_t DataSet::getLocalityPosition(const std::string& name) const throw (LocalityNotFoundException) { for (size_t i = 0; i < localities_.size(); i++) { if (localities_[i]->getName() == name) return i; } throw LocalityNotFoundException("DataSet::getLocalityPosition: Locality not found.", name); } /******************************************************************************/ const Locality& DataSet::getLocalityAtPosition(size_t locality_position) const throw (IndexOutOfBoundsException) { if (locality_position >= localities_.size()) throw IndexOutOfBoundsException("DataSet::getLocalityAtPosition: locality_position out of bounds.", locality_position, 0, localities_.size()); return *(localities_[locality_position]); } /******************************************************************************/ const Locality& DataSet::getLocalityByName(const std::string& name) const throw (LocalityNotFoundException) { try { return getLocalityAtPosition(getLocalityPosition(name)); } catch (LocalityNotFoundException& lnfe) { throw LocalityNotFoundException("DataSet::getLocalityByName: Locality not found.", name); } } /******************************************************************************/ void DataSet::deleteLocalityAtPosition(size_t locality_position) throw (IndexOutOfBoundsException) { if (locality_position >= localities_.size()) throw IndexOutOfBoundsException("DataSet::deleteLocalityAtPosition: locality_position out of bounds.", locality_position, 0, localities_.size()); delete localities_[locality_position]; localities_.erase(localities_.begin() + locality_position); } /******************************************************************************/ void DataSet::deleteLocalityByName(const std::string& name) throw (LocalityNotFoundException) { try { deleteLocalityAtPosition(getLocalityPosition(name)); } catch (LocalityNotFoundException& lnfe) { throw LocalityNotFoundException("DataSet::deleteLocalityByName: Locality not found.", name); } } /******************************************************************************/ size_t DataSet::getNumberOfLocalities() const { return localities_.size(); } /******************************************************************************/ bool DataSet::hasLocality() const { return getNumberOfLocalities() > 0; } /******************************************************************************/ // Dealing with groups ------------------------------------- void DataSet::addGroup(const Group& group) throw (BadIdentifierException) { for (size_t i = 0; i < groups_.size(); i++) { if (group.getGroupId() == groups_[i]->getGroupId()) throw BadIdentifierException("DataSet::addGroup: group id already in use.", group.getGroupId()); } groups_.push_back(new Group(group)); } /******************************************************************************/ void DataSet::addEmptyGroup(size_t group_id) throw (BadIdentifierException) { for (size_t i = 0; i < groups_.size(); i++) { if (group_id == groups_[i]->getGroupId()) throw BadIdentifierException("DataSet::addEmptyGroup: group_id already in use.", group_id); } groups_.push_back(new Group(group_id)); } /******************************************************************************/ const Group& DataSet::getGroupById(size_t group_id) const throw (GroupNotFoundException) { for (size_t i = 0; i < groups_.size(); i++) { if (group_id == groups_[i]->getGroupId()) return *(groups_[i]); } throw GroupNotFoundException("DataSet::getGroupById: group_id not found.", group_id); } /******************************************************************************/ string DataSet::getGroupName(size_t group_id) const throw (GroupNotFoundException) { string name; name = getGroupById(group_id).getGroupName(); if (!name.empty() ) return name; else return TextTools::toString(group_id); throw GroupNotFoundException("DataSet::getGroupName: group_id not found.", group_id); } /******************************************************************************/ void DataSet::setGroupName(size_t group_id, const std::string& group_name) const throw (GroupNotFoundException) { for (size_t i = 0; i < groups_.size(); i++) { if (group_id == groups_[i]->getGroupId()) { groups_[i]->setGroupName(group_name); return; } } throw GroupNotFoundException("DataSet::setGroupName: group_id not found.", group_id); } /******************************************************************************/ size_t DataSet::getGroupPosition(size_t group_id) const throw (GroupNotFoundException) { for (size_t i = 0; i < groups_.size(); i++) { if (group_id == groups_[i]->getGroupId()) return i; } throw GroupNotFoundException("DataSet::getGroupPosition: group_id not found.", group_id); } /******************************************************************************/ const Group& DataSet::getGroupAtPosition(size_t group_position) const throw (IndexOutOfBoundsException) { if (group_position >= groups_.size()) throw IndexOutOfBoundsException("DataSet::getGroup.", group_position, 0, groups_.size()); return *(groups_[group_position]); } /******************************************************************************/ void DataSet::deleteGroupAtPosition(size_t group_position) throw (IndexOutOfBoundsException) { if (group_position >= groups_.size()) throw IndexOutOfBoundsException("DataSet::deleteGroup.", group_position, 0, groups_.size()); delete groups_[group_position]; groups_.erase(groups_.begin() + group_position); } /******************************************************************************/ size_t DataSet::getNumberOfGroups() const { return groups_.size(); } /******************************************************************************/ void DataSet::mergeTwoGroups(size_t source_id, size_t target_id) throw (GroupNotFoundException) { // Test the existance of the two groups. try { getGroupById(source_id); } catch (GroupNotFoundException& e) { throw GroupNotFoundException("DataSet::mergeTwoGroups: source_id not found.", source_id); } try { getGroupById(target_id); } catch (GroupNotFoundException& e) { throw GroupNotFoundException("DataSet::mergeTwoGroups: target_id not found.", target_id); } // Emptie the source into the target size_t source_pos = getGroupPosition(source_id); size_t target_pos = getGroupPosition(target_id); for (size_t i = 0; i < groups_[source_pos]->getNumberOfIndividuals(); i++) { groups_[target_pos]->addIndividual(groups_[source_pos]->getIndividualAtPosition(i)); } deleteGroupAtPosition(source_pos); } /******************************************************************************/ void DataSet::mergeGroups(std::vector& group_ids) throw (GroupNotFoundException) { // Test if all group id exists in the DataSet for (size_t i = 0; i < group_ids.size(); i++) { try { getGroupById(group_ids[i]); } catch (GroupNotFoundException& e) { throw GroupNotFoundException("DataSet::mergeGroups: group not found.", group_ids[i]); } } // Sort the group id sort(group_ids.begin(), group_ids.end()); // Merge all the groups in the first size_t pos_first = getGroupPosition(group_ids[0]); for (size_t i = 1; i < group_ids.size(); i++) { size_t pos_current = getGroupPosition(group_ids[i]); for (size_t j = 0; j < getGroupAtPosition(pos_current).getNumberOfIndividuals(); j++) { groups_[pos_first]->addIndividual(getGroupAtPosition(pos_current).getIndividualAtPosition(j)); } deleteGroupAtPosition(pos_current); } } /******************************************************************************/ void DataSet::splitGroup(size_t group_id, std::vector individuals_selection) throw (Exception) { size_t source_pos; try { source_pos = getGroupPosition(group_id); } catch (GroupNotFoundException& gnfe) { throw GroupNotFoundException("DataSet::splitGroup: group_id not found.", gnfe.getIdentifier()); } size_t new_group_id = 0; for (size_t i = 0; i < groups_.size(); i++) { if (groups_[i]->getGroupId() > new_group_id) new_group_id = groups_[i]->getGroupId(); } new_group_id++; Group new_group(new_group_id); for (size_t i = 0; i < individuals_selection.size(); i++) { if (individuals_selection[i] >= groups_[source_pos]->getNumberOfIndividuals()) throw IndexOutOfBoundsException("DataSet::splitGroup: individuals_selection excedes the number of individual in the group.", individuals_selection[i], 0, groups_[source_pos]->getNumberOfIndividuals()); } for (size_t i = 0; i < individuals_selection.size(); i++) { new_group.addIndividual(*groups_[source_pos]->removeIndividualAtPosition(individuals_selection[i])); groups_[source_pos]->deleteIndividualAtPosition(individuals_selection[i]); } addGroup(new_group); } /******************************************************************************/ // Dealing with individuals ------------------------------- void DataSet::addIndividualToGroup(size_t group, const Individual& individual) throw (Exception) { if (group >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::addIndividualToGroup: group out of bounds.", group, 0, getNumberOfGroups()); try { groups_[group]->addIndividual(individual); if (individual.hasSequences()) setAlphabet(individual.getSequenceAlphabet()); } catch (BadIdentifierException& bie) { throw BadIdentifierException("DataSet::addIndividualToGroup: individual's id already in use in this group.", bie.getIdentifier()); } } /******************************************************************************/ void DataSet::addEmptyIndividualToGroup(size_t group, const std::string& individual_id) throw (Exception) { if (group >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::addEmptyIndividual: group out of bounds.", group, 0, getNumberOfGroups()); try { groups_[group]->addEmptyIndividual(individual_id); } catch (BadIdentifierException& bie) { throw BadIdentifierException("DataSet::addEmptyIndividual: individual_id already in use.", bie.getIdentifier()); } } /******************************************************************************/ size_t DataSet::getNumberOfIndividualsInGroup(size_t group_position) const throw (IndexOutOfBoundsException) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getNumberOfIndividualsInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); return groups_[group_position]->getNumberOfIndividuals(); } /******************************************************************************/ size_t DataSet::getIndividualPositionInGroup(size_t group_position, const std::string& individual_id) const throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getIndividualPositionFromGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { return groups_[group_position]->getIndividualPosition(individual_id); } catch (IndividualNotFoundException infe) { throw IndividualNotFoundException("DataSet::getIndividualPositionFromGroup: individual_id not found.", infe.getIdentifier()); } } /******************************************************************************/ const Individual* DataSet::getIndividualAtPositionFromGroup(size_t group_position, size_t individual_position) const throw (IndexOutOfBoundsException) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getIndividualAtPositionFromGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { return &groups_[group_position]->getIndividualAtPosition(individual_position); } catch (IndexOutOfBoundsException ioobe) { throw IndexOutOfBoundsException("DataSet::getIndividualAtPositionFromGroup: individual_position out of bouds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } /******************************************************************************/ const Individual* DataSet::getIndividualByIdFromGroup(size_t group_position, const std::string& individual_id) const throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getIndividualByIdFromGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { return &groups_[group_position]->getIndividualById(individual_id); } catch (IndividualNotFoundException infe) { throw IndividualNotFoundException("DataSet::getIndividualByIdFromGroup: individual_id not found.", infe.getIdentifier()); } } /******************************************************************************/ void DataSet::deleteIndividualAtPositionFromGroup(size_t group_position, size_t individual_position) throw (IndexOutOfBoundsException) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::deleteIndividualAtPositionFromGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->deleteIndividualAtPosition(individual_position); } catch (IndexOutOfBoundsException ioobe) { throw IndexOutOfBoundsException("DataSet::deleteIndividualAtPositionFromGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } /******************************************************************************/ void DataSet::deleteIndividualByIdFromGroup(size_t group_position, const std::string& individual_id) throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::deleteIndividualByIdFromGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->deleteIndividualById(individual_id); } catch (IndividualNotFoundException infe) { throw IndividualNotFoundException("DataSet::deleteIndividualByIdFromGroup: individual_id not found.", infe.getIdentifier()); } } /******************************************************************************/ void DataSet::setIndividualSexInGroup(size_t group_position, size_t individual_position, const unsigned short sex) throw (IndexOutOfBoundsException) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::setIndividualSexInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->setIndividualSexAtPosition(individual_position, sex); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::setIndividualSexInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } /******************************************************************************/ unsigned short DataSet::getIndividualSexInGroup(size_t group_position, size_t individual_position) const throw (IndexOutOfBoundsException) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getIndividualSexInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { return groups_[group_position]->getIndividualSexAtPosition(individual_position); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::getIndividualSexInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } /******************************************************************************/ void DataSet::setIndividualDateInGroup(size_t group_position, size_t individual_position, const Date& date) throw (IndexOutOfBoundsException) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::setIndividualDateInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->setIndividualDateAtPosition(individual_position, date); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::setIndividualDateInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } /******************************************************************************/ const Date* DataSet::getIndividualDateInGroup(size_t group_position, size_t individual_position) const throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getIndividualDateInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { return &groups_[group_position]->getIndividualDateAtPosition(individual_position); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::getIndividualDateInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::getIndividualDateInGroup: individual has no date."); } } /******************************************************************************/ void DataSet::setIndividualCoordInGroup(size_t group_position, size_t individual_position, const Point2D& coord) throw (IndexOutOfBoundsException) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::setIndividualCoordInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->setIndividualCoordAtPosition(individual_position, coord); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::setIndividualCoordInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } /******************************************************************************/ const Point2D* DataSet::getIndividualCoordInGroup(size_t group_position, size_t individual_position) const throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getIndividualCoordInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { return &groups_[group_position]->getIndividualCoordAtPosition(individual_position); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::getIndividualCoordAtPosition: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::getIndividualCoordInGroup: individual has no coordinate."); } } /******************************************************************************/ void DataSet::setIndividualLocalityInGroupByName(size_t group_position, size_t individual_position, const std::string& locality_name) throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::setIndividualLocalityInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->setIndividualLocalityAtPosition(individual_position, &getLocalityByName(locality_name)); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::setIndividualLocalityInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (LocalityNotFoundException& lnfe) { throw LocalityNotFoundException("DataSet::setIndividualLocalityInGroup: locality_name not found.", lnfe.getIdentifier()); } } /******************************************************************************/ const Locality* DataSet::getIndividualLocalityInGroup(size_t group_position, size_t individual_position) const throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getIndividualLocalityInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { return &groups_[group_position]->getIndividualLocalityAtPosition(individual_position); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::getIndividualLocalityInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::getIndividualLocalityInGroup: individual has no locality."); } } /******************************************************************************/ void DataSet::addIndividualSequenceInGroup(size_t group_position, size_t individual_position, size_t sequence_position, const Sequence& sequence) throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::addIndividualSequenceInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->addIndividualSequenceAtPosition(individual_position, sequence_position, sequence); setAlphabet(sequence.getAlphabet()); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::addIndividualSequenceInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (AlphabetMismatchException& ame) { throw AlphabetMismatchException("DataSet::addIndividualSequenceInGroup: sequence's alphabet doesn't match.", ame.getAlphabets()[0], ame.getAlphabets()[1]); } catch (BadIdentifierException& bie) { throw BadIdentifierException("DataSet::addIndividualSequenceInGroup: sequence's name already in use.", bie.getIdentifier()); } catch (BadIntegerException& bie) { throw BadIntegerException("DataSet::addIndividualSequenceInGroup: sequence_position already in use.", bie.getBadInteger()); } } /******************************************************************************/ const Sequence& DataSet::getIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) const throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getIndividualSequenceByNameInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { return groups_[group_position]->getIndividualSequenceByName(individual_position, sequence_name); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::getIndividualSequenceByNameInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::getIndividualSequenceByNameInGroup: individual has no sequences."); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("DataSet::getIndividualSequenceByNameInGroup: sequence_name not found.", snfe.getSequenceId()); } } /******************************************************************************/ const Sequence& DataSet::getIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position) const throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getIndividualSequenceAtPositionInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { return groups_[group_position]->getIndividualSequenceAtPosition(individual_position, sequence_position); } catch (IndexOutOfBoundsException& ioobe) { if (string(ioobe.what()).find("individual_position") < string(ioobe.what()).size()) throw IndexOutOfBoundsException("DataSet::getIndividualSequenceAtPositionInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); // if (string(ioobe.what()).find("sequence_position") < string(ioobe.what()).size()) else throw IndexOutOfBoundsException("DataSet::getIndividualSequenceAtPositionInGroup: sequence_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::getIndividualSequenceAtPositionInGroup: individual has no sequences."); } } /******************************************************************************/ void DataSet::deleteIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::deleteIndividualSequenceByNameInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->deleteIndividualSequenceByName(individual_position, sequence_name); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::deleteIndividualSequenceByNameInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::deleteIndividualSequenceByNameInGroup: individual has no sequences."); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("DataSet::deleteIndividualSequenceByNameInGroup: sequence_name not found.", snfe.getSequenceId()); } } /******************************************************************************/ void DataSet::deleteIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position) throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::deleteIndividualSequenceAtPositionInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->deleteIndividualSequenceAtPosition(individual_position, sequence_position); } catch (IndexOutOfBoundsException& ioobe) { if (string(ioobe.what()).find("individual_position") < string(ioobe.what()).size()) throw IndexOutOfBoundsException("DataSet::deleteIndividualSequenceAtPositionInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); // if (string(ioobe.what()).find("sequence_position") < string(ioobe.what()).size()) else throw IndexOutOfBoundsException("DataSet::deleteIndividualSequenceAtPositionInGroup: sequence_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::deleteIndividualSequenceAtPositionInGroup: individual has no sequences."); } } /******************************************************************************/ std::vector DataSet::getIndividualSequencesNamesInGroup(size_t group_position, size_t individual_position) const throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getIndividualSequencesNamesInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { return groups_[group_position]->getIndividualSequencesNames(individual_position); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::getIndividualSequencesNamesInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::getIndividualSequencesNamesInGroup: individual has no sequences."); } } /******************************************************************************/ size_t DataSet::getIndividualSequencePositionInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) const throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getIndividualSequencePositionInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { return groups_[group_position]->getIndividualSequencePosition(individual_position, sequence_name); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::getIndividualSequencePositionInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::getIndividualSequencePositionInGroup: individual has no sequences."); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("DataSet::getIndividualSequencePositionInGroup: sequence_name not found.", snfe.getSequenceId()); } } /******************************************************************************/ size_t DataSet::getIndividualNumberOfSequencesInGroup(size_t group_position, size_t individual_position) const throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getIndividualNumberOfSequencesInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { return groups_[group_position]->getIndividualNumberOfSequences(individual_position); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::getIndividualNumberOfSequencesInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::getIndividualNumberOfSequencesInGroup: individual has no sequences."); } } /******************************************************************************/ void DataSet::setIndividualGenotypeInGroup(size_t group_position, size_t individual_position, const MultilocusGenotype& genotype) throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::setIndividualGenotypeInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->setIndividualGenotype(individual_position, genotype); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::setIndividualGenotypeInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } /******************************************************************************/ void DataSet::initIndividualGenotypeInGroup(size_t group_position, size_t individual_position) throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::initIndividualGenotypeInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->initIndividualGenotype(individual_position, getNumberOfLoci()); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::initIndividualGenotypeInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (BadIntegerException& bie) { throw BadIntegerException("DataSet::initIndividualGenotypeInGroup: number of loci must be > 0.", bie.getBadInteger()); } catch (NullPointerException) { throw NullPointerException("DataSet::initIndividualGenotypeInGroup: analyzed_loci is NULL."); } catch (Exception) { throw Exception("DataSet::initIndividualGenotypeInGroup: individual already has a genotype."); } } /******************************************************************************/ void DataSet::deleteIndividualGenotypeInGroup(size_t group_position, size_t individual_position) throw (IndexOutOfBoundsException) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::deleteIndividualGenotypeInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->deleteIndividualGenotype(individual_position); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::deleteIndividualGenotypeInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } /******************************************************************************/ void DataSet::setIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position, const MonolocusGenotype& monogen) throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->setIndividualMonolocusGenotype(individual_position, locus_position, monogen); } catch (IndexOutOfBoundsException& ioobe) { if (string(ioobe.what()).find("individual_position") < string(ioobe.what()).size()) throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); // if (string(ioobe.what()).find("locus_position") < string(ioobe.what()).size()) else throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeInGroup: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::setIndividualMonolocusGenotypeInGroup: individual has no genotype."); } } /******************************************************************************/ void DataSet::setIndividualMonolocusGenotypeByAlleleKeyInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector allele_keys) throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeByAlleleKeyInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { groups_[group_position]->setIndividualMonolocusGenotypeByAlleleKey(individual_position, locus_position, allele_keys); } catch (IndexOutOfBoundsException& ioobe) { if (string(ioobe.what()).find("individual_position") < string(ioobe.what()).size()) throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeByAlleleKeyInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); // if (string(ioobe.what()).find("locus_position") < string(ioobe.what()).size()) else throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeByAlleleKeyInGroup: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::setIndividualMonolocusGenotypeByAlleleKeyInGroup: individual has no genotype."); } catch (Exception) { throw Exception("DataSet::setIndividualMonolocusGenotypeByAlleleKeyInGroup: no key in allele_keys."); } } /******************************************************************************/ void DataSet::setIndividualMonolocusGenotypeByAlleleIdInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector allele_id) throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeByAlleleIdInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); const LocusInfo& locus_info = getLocusInfoAtPosition(locus_position); try { groups_[group_position]->setIndividualMonolocusGenotypeByAlleleId(individual_position, locus_position, allele_id, locus_info); } catch (IndexOutOfBoundsException& ioobe) { if (string(ioobe.what()).find("individual_position") < string(ioobe.what()).size()) throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeByAlleleIdInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); // if (string(ioobe.what()).find("locus_position") < string(ioobe.what()).size()) else throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeByAlleleIdInGroup: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::setIndividualMonolocusGenotypeByAlleleIdInGroup: individual has no genotype."); } catch (AlleleNotFoundException& anfe) { throw AlleleNotFoundException("DataSet::setIndividualMonolocusGenotypeByAlleleIdInGroup: id not found.", anfe.getIdentifier()); } } /******************************************************************************/ const MonolocusGenotype* DataSet::getIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position) const throw (Exception) { if (group_position >= getNumberOfGroups()) throw IndexOutOfBoundsException("DataSet::getIndividualMonolocusGenotypeInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups()); try { return &groups_[group_position]->getIndividualMonolocusGenotype(individual_position, locus_position); } catch (IndexOutOfBoundsException& ioobe) { if (string(ioobe.what()).find("individual_position") < string(ioobe.what()).size()) throw IndexOutOfBoundsException("DataSet::getIndividualMonolocusGenotypeInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); // if (string(ioobe.what()).find("locus_position") < string(ioobe.what()).size()) else throw IndexOutOfBoundsException("DataSet::getIndividualMonolocusGenotypeInGroup: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException) { throw NullPointerException("DataSet::getIndividualMonolocusGenotypeInGroup: individual has no genotype."); } } /******************************************************************************/ // Dealing with AnalyzedSequences -------------------------- void DataSet::setAlphabet(const Alphabet* alpha) { if (analyzedSequences_ == 0) analyzedSequences_ = new AnalyzedSequences(); analyzedSequences_->setAlphabet(alpha); } /******************************************************************************/ void DataSet::setAlphabet(const std::string& alpha_type) { if (analyzedSequences_ == 0) analyzedSequences_ = new AnalyzedSequences(); analyzedSequences_->setAlphabet(alpha_type); } /******************************************************************************/ const Alphabet* DataSet::getAlphabet() const throw (NullPointerException) { if (analyzedSequences_ != 0) return analyzedSequences_->getAlphabet(); throw NullPointerException("DataSet::getAlphabet: no sequence data."); } /******************************************************************************/ std::string DataSet::getAlphabetType() const throw (NullPointerException) { if (analyzedSequences_ != 0) return analyzedSequences_->getAlphabetType(); throw NullPointerException("DataSet::getAlphabetType: no sequence data."); } /******************************************************************************/ // Dealing with AnalyzedLoci ------------------------------- void DataSet::setAnalyzedLoci(const AnalyzedLoci& analyzedLoci) throw (Exception) { if (analyzedLoci_ != 0) { try { deleteAnalyzedLoci(); } catch (Exception& e) { throw Exception ("DataSet::setAnalyzedLoci: at least one individual has a genotype of the actual AnalyzedLoci."); } } analyzedLoci_ = new AnalyzedLoci(analyzedLoci); } /******************************************************************************/ void DataSet::initAnalyzedLoci(size_t number_of_loci) throw (Exception) { if (analyzedLoci_ != 0) throw Exception("DataSet::initAnalyzedLoci: analyzedLoci_ already initialyzed."); analyzedLoci_ = new AnalyzedLoci(number_of_loci); } /******************************************************************************/ const AnalyzedLoci* DataSet::getAnalyzedLoci() const throw (NullPointerException) { if (analyzedLoci_ != 0) return analyzedLoci_; throw NullPointerException("DataSet::getAnalyzedLoci: no loci initialized."); } /******************************************************************************/ void DataSet::deleteAnalyzedLoci() { if (analyzedLoci_ != 0) delete analyzedLoci_; } /******************************************************************************/ void DataSet::setLocusInfo(size_t locus_position, const LocusInfo& locus) throw (Exception) { if (analyzedLoci_ == 0) throw NullPointerException("DataSet::setLocusInfo: there's no AnalyzedLoci to setup."); try { analyzedLoci_->setLocusInfo(locus_position, locus); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::setLocusInfo: locus_position out of bounds.", locus_position, 0, analyzedLoci_->getNumberOfLoci()); } } /******************************************************************************/ const LocusInfo& DataSet::getLocusInfoByName(const std::string& locus_name) const throw (Exception) { if (analyzedLoci_ == 0) throw NullPointerException("DataSet::getLocusInfoByName: there's no AnalyzedLoci."); try { return analyzedLoci_->getLocusInfoByName(locus_name); } catch (LocusNotFoundException& lnfe) { throw LocusNotFoundException("DataSet::getLocusInfoByName: locus_name not found", locus_name); } } /******************************************************************************/ const LocusInfo& DataSet::getLocusInfoAtPosition(size_t locus_position) const throw (Exception) { if (analyzedLoci_ == 0) throw NullPointerException("DataSet::getLocusInfoAtPosition: there's no AnalyzedLoci."); try { return analyzedLoci_->getLocusInfoAtPosition(locus_position); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::getLocusInfoAtPosition: locus_position out of bounds.", locus_position, ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (NullPointerException& npe) { throw NullPointerException("DataSet::getLocusInfoAtPosition: no locus defined here"); } } /******************************************************************************/ void DataSet::addAlleleInfoByLocusName(const std::string& locus_name, const AlleleInfo& allele) throw (Exception) { if (analyzedLoci_ == 0) throw NullPointerException("DataSet::addAlleleInfoByLocusName: there's no AnalyzedLoci."); try { analyzedLoci_->addAlleleInfoByLocusName(locus_name, allele); } catch (LocusNotFoundException& lnfe) { throw LocusNotFoundException("DataSet::addAlleleInfoByLocusName: locus_name not found.", lnfe.getIdentifier()); } catch (BadIdentifierException& bie) { throw BadIdentifierException("DataSet::addAlleleInfoByLocusName: allele's id already in use.", bie.getIdentifier()); } } /******************************************************************************/ void DataSet::addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo& allele) throw (Exception) { if (analyzedLoci_ == 0) throw NullPointerException("DataSet::addAlleleInfoByLocusPosition: there's no AnalyzedLoci."); try { analyzedLoci_->addAlleleInfoByLocusPosition(locus_position, allele); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::addAlleleInfoByLocusPosition: locus_position out of bounds.", locus_position, ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (BadIdentifierException& bie) { throw BadIdentifierException("DataSet::addAlleleInfoByLocusPosition: allele'e id already in use.", bie.getIdentifier()); } } /******************************************************************************/ size_t DataSet::getNumberOfLoci() const throw (NullPointerException) { if (analyzedLoci_ == 0) throw NullPointerException("DataSet::getNumberOfLoci: there's no AnalyzedLoci."); return analyzedLoci_->getNumberOfLoci(); } /******************************************************************************/ size_t DataSet::getPloidyByLocusName(const std::string& locus_name) const throw (Exception) { if (analyzedLoci_ == 0) throw NullPointerException("DataSet::getPloidyByLocusName: there's no AnalyzedLoci."); try { return analyzedLoci_->getPloidyByLocusName(locus_name); } catch (LocusNotFoundException& lnfe) { throw LocusNotFoundException("DataSet::getPloidyByLocusName: locus_name not found.", lnfe.getIdentifier()); } } /******************************************************************************/ size_t DataSet::getPloidyByLocusPosition(size_t locus_position) const throw (Exception) { if (analyzedLoci_ == 0) throw NullPointerException("DataSet::getPloidyByLocusPosition: there's no AnalyzedLoci."); try { return analyzedLoci_->getPloidyByLocusPosition(locus_position); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("DataSet::getPloidyByLocusPosition: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } /******************************************************************************/ // Container extraction ----------------------------------- PolymorphismMultiGContainer* DataSet::getPolymorphismMultiGContainer() const { PolymorphismMultiGContainer* pmgc = new PolymorphismMultiGContainer(); for (size_t i = 0; i < getNumberOfGroups(); i++) { // nommer les groupes khalid string name = groups_[i]->getGroupName(); pmgc->addGroupName(i, name); for (size_t j = 0; j < getNumberOfIndividualsInGroup(i); j++) { const Individual* tmp_ind = getIndividualAtPositionFromGroup(i, j); if (tmp_ind->hasGenotype()) { const MultilocusGenotype& tmp_mg = tmp_ind->getGenotype(); pmgc->addMultilocusGenotype(tmp_mg, i); } } } return pmgc; } /******************************************************************************/ PolymorphismMultiGContainer* DataSet::getPolymorphismMultiGContainer(const std::map >& selection) const throw (Exception) { PolymorphismMultiGContainer* pmgc = new PolymorphismMultiGContainer(); for (map >::const_iterator it = selection.begin(); it != selection.end(); it++) { size_t i; try { i = getGroupPosition(it->first); } catch (GroupNotFoundException& gnfe) { throw gnfe; } string name = groups_[i]->getGroupName(); pmgc->addGroupName(i, name); for (size_t j = 0; j < it->second.size(); j++) { const Individual* tmp_ind = 0; try { tmp_ind = getIndividualAtPositionFromGroup(i, j); } catch (IndexOutOfBoundsException& ioobe) { throw ioobe; } if (tmp_ind->hasGenotype()) { const MultilocusGenotype& tmp_mg = tmp_ind->getGenotype(); pmgc->addMultilocusGenotype(tmp_mg, i); } } } return pmgc; } /******************************************************************************/ PolymorphismSequenceContainer* DataSet::getPolymorphismSequenceContainer(const std::map >& selection, size_t sequence_position) const throw (Exception) { PolymorphismSequenceContainer* psc = new PolymorphismSequenceContainer(getAlphabet()); for (map >::const_iterator it = selection.begin(); it != selection.end(); it++) { size_t i; try { i = getGroupPosition(it->first); } catch (GroupNotFoundException& gnfe) { delete psc; throw gnfe; } for (size_t j = 0; j < it->second.size(); j++) { const Individual* tmp_ind = 0; try { tmp_ind = getIndividualAtPositionFromGroup(i, j); } catch (IndexOutOfBoundsException& ioobe) { delete psc; throw ioobe; } if (tmp_ind->hasSequenceAtPosition(sequence_position)) { const Sequence* tmp_seq = &tmp_ind->getSequenceAtPosition(sequence_position); psc->addSequence(*tmp_seq, 1, false); psc->setGroupId((const string) (tmp_seq->getName()), it->first); } } } return psc; } /******************************************************************************/ // General tests ------------------------------------------ bool DataSet::hasSequenceData() const { return analyzedSequences_ != 0; } /******************************************************************************/ bool DataSet::hasAlleleicData() const { return analyzedLoci_ != 0; } /******************************************************************************/ bpp-popgen-2.1.0/src/Bpp/PopGen/DataSetTools.cpp000644 000000 000000 00000006024 12147656633 021330 0ustar00rootroot000000 000000 // // File DataSetTools.cpp // Author : Sylvain Gaillard // Last modification : Wednesday August 04 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "DataSetTools.h" using namespace bpp; using namespace std; std::auto_ptr DataSetTools::buildDataSet(const OrderedSequenceContainer& osc) throw (Exception) { auto_ptr d_s(new DataSet()); d_s->addEmptyGroup(0); for (size_t i = 0; i < osc.getNumberOfSequences(); i++) { d_s->addEmptyIndividualToGroup(0, string("Individual_") + TextTools::toString(i + 1)); try { d_s->addIndividualSequenceInGroup(0, i, 0, osc.getSequence(i)); } catch (Exception& e) { throw e; } } return d_s; } std::auto_ptr DataSetTools::buildDataSet(const PolymorphismSequenceContainer& psc) throw (Exception) { auto_ptr d_s(new DataSet()); set grp_ids = psc.getAllGroupsIds(); for (set::iterator it = grp_ids.begin(); it != grp_ids.end(); it++) { d_s->addEmptyGroup(*it); } size_t ind_count = 0; for (size_t i = 0; i < psc.getNumberOfSequences(); i++) { for (size_t j = 0; j < psc.getSequenceCount(i); j++) { d_s->addEmptyIndividualToGroup(psc.getGroupId(i), string("Individual_") + TextTools::toString(ind_count++)); try { d_s->addIndividualSequenceInGroup(psc.getGroupId(i), i, 0, psc.getSequence(i)); } catch (Exception& e) { throw e; } } } return d_s; } bpp-popgen-2.1.0/src/Bpp/PopGen/LocusInfo.cpp000644 000000 000000 00000010564 12147656633 020667 0ustar00rootroot000000 000000 // // File LocusInfo.cpp // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include #include "LocusInfo.h" #include "GeneralExceptions.h" using namespace bpp; using namespace std; unsigned int LocusInfo::HAPLODIPLOID = 0; unsigned int LocusInfo::HAPLOID = 1; unsigned int LocusInfo::DIPLOID = 2; unsigned int LocusInfo::UNKNOWN = 9999; // ** Class constructor: *******************************************************/ LocusInfo::LocusInfo(const std::string& name, const unsigned int ploidy) : name_(name), ploidy_(ploidy), alleles_(vector()) {} LocusInfo::LocusInfo(const LocusInfo& locus_info) : name_(locus_info.getName()), ploidy_(locus_info.getPloidy()), alleles_(vector(locus_info.getNumberOfAlleles())) { for (unsigned int i = 0; i < locus_info.getNumberOfAlleles(); i++) { alleles_[i] = dynamic_cast(locus_info.getAlleleInfoByKey(i).clone()); } } // ** Class destructor: *******************************************************/ LocusInfo::~LocusInfo() { for (unsigned int i = 0; i < alleles_.size(); i++) { delete alleles_[i]; } alleles_.clear(); } // ** Other methodes: *********************************************************/ // AlleleInfos void LocusInfo::addAlleleInfo(const AlleleInfo& allele) throw (BadIdentifierException) { // Check if the allele id is not already in use for (unsigned int i = 0; i < alleles_.size(); i++) { if (alleles_[i]->getId() == allele.getId()) throw BadIdentifierException("LocusInfo::addAlleleInfo: Id already in use.", allele.getId()); } alleles_.push_back(allele.clone()); } const AlleleInfo& LocusInfo::getAlleleInfoById(const std::string& id) const throw (AlleleNotFoundException) { for (unsigned int i = 0; i < alleles_.size(); i++) { if (alleles_[i]->getId() == id) return *(alleles_[i]); } throw AlleleNotFoundException("LocusInfo::getAlleleInfoById: AlleleInfo id unknown.", id); } const AlleleInfo& LocusInfo::getAlleleInfoByKey(size_t key) const throw (IndexOutOfBoundsException) { if (key >= alleles_.size()) throw IndexOutOfBoundsException("LocusInfo::getAlleleInfoByKey: key out of bounds.", key, 0, alleles_.size()); return *(alleles_[key]); } unsigned int LocusInfo::getAlleleInfoKey(const std::string& id) const throw (AlleleNotFoundException) { for (unsigned int i = 0; i < alleles_.size(); i++) { if (alleles_[i]->getId() == id) return i; } throw AlleleNotFoundException("LocusInfo::getAlleleInfoKey: AlleleInfo id not found.", id); } size_t LocusInfo::getNumberOfAlleles() const { return alleles_.size(); } void LocusInfo::clear() { for (unsigned int i = 0; i < alleles_.size(); i++) { delete alleles_[i]; } alleles_.clear(); } bpp-popgen-2.1.0/src/Bpp/PopGen/Genepop.h000644 000000 000000 00000005530 12147656633 020025 0ustar00rootroot000000 000000 // // File Genepop.h // Author : Sylvain Gaillard // Last modification : Tuesday September 21 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _GENEPOP_H_ #define _GENEPOP_H_ #include #include #include #include // From local Pop #include "AbstractIDataSet.h" #include "BasicAlleleInfo.h" namespace bpp { /** * @brief The Genepop input format for popgenlib. * * @author Sylvain Gaillard */ class Genepop : public AbstractIDataSet { public: // Constructor and destructor Genepop(); ~Genepop(); public: /** * @name The IDataSet interface. * @{ */ void read(std::istream& is, DataSet& data_set) throw (Exception); void read(const std::string& path, DataSet& data_set) throw (Exception); DataSet* read(std::istream& is) throw (Exception); DataSet* read(const std::string& path) throw (Exception); /** * @} */ /** * @name The IOFormat interface * @{ */ const std::string getFormatName() const { return "Genepop ver 3.4"; } const std::string getFormatDescription() const { return "Genepop is a software for populations genetic for DOS operating system"; } /** * @} */ }; } // end of namespace bpp; #endif // _GENEPOP_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/BiAlleleMonolocusGenotype.cpp000644 000000 000000 00000007755 12147656633 024061 0ustar00rootroot000000 000000 // // File BiAlleleMonolocusGenotype.cpp // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "BiAlleleMonolocusGenotype.h" using namespace bpp; using namespace std; // ** Class constructor: *******************************************************/ BiAlleleMonolocusGenotype::BiAlleleMonolocusGenotype( size_t first_allele_index, size_t second_allele_index) : allele_index_(vector(2)) { allele_index_[0] = first_allele_index; allele_index_[1] = second_allele_index; } BiAlleleMonolocusGenotype::BiAlleleMonolocusGenotype(std::vector allele_index) throw (BadSizeException) : allele_index_(vector(2)) { if (allele_index.size() != 2) throw BadSizeException("BiAlleleMonolocusGenotype::BiAlleleMonolocusGenotype: allele_index must contain two values.", allele_index.size(), 2); allele_index_[0] = allele_index[0]; allele_index_[1] = allele_index[1]; } BiAlleleMonolocusGenotype::BiAlleleMonolocusGenotype(const BiAlleleMonolocusGenotype& bmg) : allele_index_(vector(2)) { for (size_t i = 0; i < 2; i++) { allele_index_[i] = bmg.getAlleleIndex()[i]; } } // ** Class destructor: ********************************************************/ BiAlleleMonolocusGenotype::~BiAlleleMonolocusGenotype() { allele_index_.clear(); } // ** Other methodes: **********************************************************/ BiAlleleMonolocusGenotype& BiAlleleMonolocusGenotype::operator=(const BiAlleleMonolocusGenotype& bmg) { for (size_t i = 0; i < 2; i++) { allele_index_.push_back(bmg.getAlleleIndex()[i]); } return *this; } bool BiAlleleMonolocusGenotype::operator==(const BiAlleleMonolocusGenotype& bmg) const { return (allele_index_[0] == bmg.getAlleleIndex()[0] && allele_index_[1] == bmg.getAlleleIndex()[1]) || (allele_index_[0] == bmg.getAlleleIndex()[1] && allele_index_[1] == bmg.getAlleleIndex()[0]); } size_t BiAlleleMonolocusGenotype::getFirstAlleleIndex() const { return allele_index_[0]; } size_t BiAlleleMonolocusGenotype::getSecondAlleleIndex() const { return allele_index_[1]; } bool BiAlleleMonolocusGenotype::isHomozygous() const { return allele_index_[0] == allele_index_[1]; } std::vector BiAlleleMonolocusGenotype::getAlleleIndex() const { return allele_index_; } BiAlleleMonolocusGenotype* BiAlleleMonolocusGenotype::clone() const { return new BiAlleleMonolocusGenotype(*this); } bpp-popgen-2.1.0/src/Bpp/PopGen/SequenceStatistics.cpp000644 000000 000000 00000155344 12147656633 022617 0ustar00rootroot000000 000000 // // File SequenceStatistics.cpp // Authors: Eric Bazin // Sylvain Gailard // Khalid Belkhir // Benoit Nabholz // Created on: Wed Aug 04 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "SequenceStatistics.h" // class's header file #include "PolymorphismSequenceContainerTools.h" #include "PolymorphismSequenceContainer.h" // From the STL: #include #include #include #include using namespace std; // From SeqLib: #include #include #include #include #include #include #include #include #include using namespace bpp; // ****************************************************************************** // Basic statistics // ****************************************************************************** size_t SequenceStatistics::polymorphicSiteNumber(const PolymorphismSequenceContainer& psc, bool gapflag, bool ignoreUnknown) { size_t S = 0; const Site* site = 0; ConstSiteIterator* si = 0; if (gapflag) si = new CompleteSiteContainerIterator(psc); else si = new SimpleSiteContainerIterator(psc); while (si->hasMoreSites()) { site = si->nextSite(); if (!SiteTools::isConstant(*site, ignoreUnknown)) { S++; } } delete si; return S; } size_t SequenceStatistics::parsimonyInformativeSiteNumber(const PolymorphismSequenceContainer& psc, bool gapflag) { ConstSiteIterator* si = 0; if (gapflag) si = new CompleteSiteContainerIterator(psc); else si = new SimpleSiteContainerIterator(psc); size_t S = 0; const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); if (SiteTools::isParsimonyInformativeSite(*site)) { S++; } } delete si; return S; } size_t SequenceStatistics::countSingleton(const PolymorphismSequenceContainer& psc, bool gapflag) { size_t nus = 0; const Site* site = 0; ConstSiteIterator* si = 0; if (gapflag) si = new CompleteSiteContainerIterator(psc); else si = new SimpleSiteContainerIterator(psc); while (si->hasMoreSites()) { site = si->nextSite(); nus += getSingletonNumber_(*site); } delete si; return nus; } size_t SequenceStatistics::tripletNumber(const PolymorphismSequenceContainer& psc, bool gapflag) { ConstSiteIterator* si = 0; if (gapflag) si = new CompleteSiteContainerIterator(psc); else si = new SimpleSiteContainerIterator(psc); int S = 0; const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); if (SiteTools::isTriplet(*site)) { S++; } } delete si; return S; } size_t SequenceStatistics::totNumberMutations(const PolymorphismSequenceContainer& psc, bool gapflag) { size_t tnm = 0; const Site* site = 0; ConstSiteIterator* si = 0; if (gapflag) si = new CompleteSiteContainerIterator(psc); else si = new SimpleSiteContainerIterator(psc); while (si->hasMoreSites()) { site = si->nextSite(); tnm += getMutationNumber_(*site); } delete si; return tnm; } size_t SequenceStatistics::totMutationsExternalBranchs( const PolymorphismSequenceContainer& ing, const PolymorphismSequenceContainer& outg) throw (Exception) { if (ing.getNumberOfSites() != outg.getNumberOfSites()) throw Exception("ing and outg must have the same size"); size_t nmuts = 0; const Site* site_in = 0; const Site* site_out = 0; ConstSiteIterator* si = 0; ConstSiteIterator* so = 0; si = new SimpleSiteContainerIterator(ing); so = new SimpleSiteContainerIterator(outg); while (si->hasMoreSites()) { site_in = si->nextSite(); site_out = so->nextSite(); // use fully resolved sites if (SiteTools::isComplete(*site_in) && SiteTools::isComplete(*site_out)) nmuts += getDerivedSingletonNumber_(*site_in, *site_out); // singletons that are not in outgroup } delete si; delete so; return nmuts; } double SequenceStatistics::heterozygosity(const PolymorphismSequenceContainer& psc, bool gapflag) { ConstSiteIterator* si = 0; const Site* site = 0; if (gapflag) si = new CompleteSiteContainerIterator(psc); else si = new SimpleSiteContainerIterator(psc); double S = 0; while (si->hasMoreSites()) { site = si->nextSite(); S += SiteTools::heterozygosity(*site); } delete si; return S; } double SequenceStatistics::squaredHeterozygosity(const PolymorphismSequenceContainer& psc, bool gapflag) { ConstSiteIterator* si = 0; const Site* site = 0; if (gapflag) si = new CompleteSiteContainerIterator(psc); else si = new SimpleSiteContainerIterator(psc); double S = 0; while (si->hasMoreSites()) { site = si->nextSite(); double h = SiteTools::heterozygosity(*site); S += h * h; } delete si; return S; } // ****************************************************************************** // GC statistics // ****************************************************************************** double SequenceStatistics::gcContent(const PolymorphismSequenceContainer& psc) { map freqs; SequenceContainerTools::getFrequencies(psc, freqs); const Alphabet* alpha = psc.getAlphabet(); return (freqs[alpha->charToInt("C")] + freqs[alpha->charToInt("G")]) / (freqs[alpha->charToInt("A")] + freqs[alpha->charToInt("C")] + freqs[alpha->charToInt("G")] + freqs[alpha->charToInt("T")]); } std::vector SequenceStatistics::gcPolymorphism(const PolymorphismSequenceContainer& psc, bool gapflag) { size_t nbMut = 0; size_t nbGC = 0; const size_t nbSeq = psc.getNumberOfSequences(); vector vect(2); const Site* site = 0; ConstSiteIterator* si = 0; if (gapflag) si = new CompleteSiteContainerIterator(psc); else si = new NoGapSiteContainerIterator(psc); while (si->hasMoreSites()) { site = si->nextSite(); if (!SiteTools::isConstant(*site)) { long double freqGC = SymbolListTools::getGCContent(*site); /* * Sylvain Gaillard 15/03/2010: realy unclear ... * freqGC is always in [0,1] then why testing it ? * why casting double into size_t ? * is that method used by someone ? */ if (freqGC > 0 && freqGC < 1) { nbMut += static_cast(nbSeq); long double adGC = freqGC * nbSeq; nbGC += static_cast(adGC); } } } vect[0] = nbMut; vect[1] = nbGC; delete si; return vect; } // ****************************************************************************** // Diversity statistics // ****************************************************************************** double SequenceStatistics::watterson75(const PolymorphismSequenceContainer& psc, bool gapflag, bool ignoreUnknown) { double ThetaW; size_t n = psc.getNumberOfSequences(); size_t S = polymorphicSiteNumber(psc, gapflag, ignoreUnknown); map values = getUsefullValues_(n); ThetaW = (double) S / values["a1"]; return ThetaW; } double SequenceStatistics::tajima83(const PolymorphismSequenceContainer& psc, bool gapflag) { size_t alphabet_size = (psc.getAlphabet())->getSize(); const Site* site = 0; ConstSiteIterator* si = 0; double value2 = 0.; if (gapflag) si = new CompleteSiteContainerIterator(psc); else si = new SimpleSiteContainerIterator(psc); while (si->hasMoreSites()) { site = si->nextSite(); if (!SiteTools::isConstant(*site)) { double value = 0.; map count; SymbolListTools::getCounts(*site, count); map tmp_k; size_t tmp_n = 0; for (map::iterator it = count.begin(); it != count.end(); it++) { if (it->first >= 0 && it->first < static_cast(alphabet_size)) { tmp_k[it->first] = it->second * (it->second - 1); tmp_n += it->second; } } if (tmp_n == 0 || tmp_n == 1) continue; for (map::iterator it = tmp_k.begin(); it != tmp_k.end(); it++) { value += static_cast(it->second) / static_cast(tmp_n * (tmp_n - 1)); } value2 += 1. - value; } } delete si; return value2; } double SequenceStatistics::FayWu2000(const PolymorphismSequenceContainer& psc, const Sequence& ancestralSites) { if (psc.getNumberOfSites() != ancestralSites.size()) throw Exception("SequenceStatistics::FayWu2000: ancestralSites and psc don't have the same size!!!'" ); const Sequence& tmps = psc.getSequence(0); size_t alphabet_size = (psc.getAlphabet())->getSize(); double value = 0.; for (size_t i = 0; i < psc.getNumberOfSites(); i++) { const Site& site = psc.getSite(i); string ancB = ancestralSites.getChar(i); int ancV = ancestralSites.getValue(i); if (!SiteTools::isConstant(site) || tmps.getChar(i) != ancB) { if (ancV < 0) continue; map count; SymbolListTools::getCounts(site, count); map tmp_k; size_t tmp_n = 0; for (map::iterator it = count.begin(); it != count.end(); it++) { if (it->first >= 0 && it->first < static_cast(alphabet_size)) { /* if derived allele */ if (it->first != ancV) { tmp_k[it->first] = 2 * it->second * it->second; } tmp_n += it->second; } } if (tmp_n == 0 || tmp_n == 1) continue; for (map::iterator it = tmp_k.begin(); it != tmp_k.end(); it++) { value += static_cast(it->second) / static_cast(tmp_n * (tmp_n - 1)); } } } return value; } size_t SequenceStatistics::DVK(const PolymorphismSequenceContainer& psc, bool gapflag) { /* * Sylvain Gaillard 17/03/2010: * This implementation uses unneeded SequenceContainer recopy and works on * string. It needs to be improved. */ PolymorphismSequenceContainer* sc = 0; if (gapflag) sc = PolymorphismSequenceContainerTools::getSitesWithoutGaps(psc); else sc = new PolymorphismSequenceContainer(psc); // int K = 0; vector pscvector; pscvector.push_back(sc->toString(0)); // K++; for (size_t i = 1; i < sc->getNumberOfSequences(); i++) { bool uniq = true; string query = sc->toString(i); for (vector::iterator it = pscvector.begin(); it != pscvector.end(); it++) { if (query.compare(*it) == 0) { uniq = false; break; } } if (uniq) { // K++; pscvector.push_back(query); } } delete sc; // return K; return pscvector.size(); } double SequenceStatistics::DVH(const PolymorphismSequenceContainer& psc, bool gapflag) { /* * Sylvain Gaillard 17/03/2010: * This implementation uses unneeded SequenceContainer recopy and works on * string. It needs to be improved. */ PolymorphismSequenceContainer* sc = 0; if (gapflag) sc = PolymorphismSequenceContainerTools::getSitesWithoutGaps(psc); else sc = new PolymorphismSequenceContainer(psc); double H = 0.; size_t nbSeq; vector pscvector; vector effvector; pscvector.push_back(sc->toString(0)); effvector.push_back(sc->getSequenceCount(0)); nbSeq = sc->getSequenceCount(0); for (size_t i = 1; i < sc->getNumberOfSequences(); i++) { nbSeq += sc->getSequenceCount(i); bool uniq = true; string query = sc->toString(i); for (size_t j = 0; j < pscvector.size(); j++) { if (query.compare(pscvector[j]) == 0) { effvector[j] += sc->getSequenceCount(i); uniq = false; break; } } if (uniq) { pscvector.push_back(query); effvector.push_back(sc->getSequenceCount(i)); } } for (size_t i = 0; i < effvector.size(); i++) { H -= (static_cast(effvector[i]) / static_cast(nbSeq)) * ( static_cast(effvector[i]) / static_cast(nbSeq)); } H += 1.; delete sc; return H; } size_t SequenceStatistics::getNumberOfTransitions(const PolymorphismSequenceContainer& psc) { size_t nbT = 0; ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); // if (SiteTools::isConstant(*site) || SiteTools::isTriplet(*site)) continue; if (SiteTools::getNumberOfDistinctCharacters(*site) != 2) continue; vector seq = site->getContent(); int state1 = seq[0]; int state2 = seq[0]; for (size_t i = 1; i < seq.size(); i++) { if (state1 != seq[i]) { state2 = seq[i]; break; } } if (((state1 == 0 && state2 == 2) || (state1 == 2 && state2 == 0)) || ((state1 == 1 && state2 == 3) || (state1 == 3 && state2 == 1))) { nbT++; } } delete si; return nbT; } size_t SequenceStatistics::getNumberOfTransversions(const PolymorphismSequenceContainer& psc) { size_t nbTv = 0; ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); // if (SiteTools::isConstant(*site) || SiteTools::isTriplet(*site)) continue; if (SiteTools::getNumberOfDistinctCharacters(*site) != 2) continue; vector seq = site->getContent(); int state1 = seq[0]; int state2 = seq[0]; for (size_t i = 1; i < seq.size(); i++) { if (state1 != seq[i]) { state2 = seq[i]; break; } } if (!(((state1 == 0 && state2 == 2) || (state1 == 2 && state2 == 0)) || ((state1 == 1 && state2 == 3) || (state1 == 3 && state2 == 1)))) { nbTv++; } } delete si; return nbTv; } double SequenceStatistics::getTransitionsTransversionsRatio(const PolymorphismSequenceContainer& psc) throw (Exception) { // return (double) getNumberOfTransitions(psc)/getNumberOfTransversions(psc); size_t nbT = 0; size_t nbTv = 0; ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); const Site* site = 0; vector< int > state(2); while (si->hasMoreSites()) { map count; site = si->nextSite(); SymbolListTools::getCounts(*site, count); if (count.size() != 2) continue; int i = 0; for (map::iterator it = count.begin(); it != count.end(); it++) { state[i] = it->first; i++; } if (((state[0] == 0 && state[1] == 2) || (state[0] == 2 && state[1] == 0)) || ((state[0] == 1 && state[1] == 3) || (state[0] == 3 && state[1] == 1))) { nbT++; // transitions } else { nbTv++; // transversion } } delete si; if (nbTv == 0) throw ZeroDivisionException("SequenceStatistics::getTransitionsTransversionsRatio."); return static_cast(nbT) / static_cast(nbTv); } // ****************************************************************************** // Synonymous and non-synonymous polymorphism // ****************************************************************************** size_t SequenceStatistics::stopCodonSiteNumber(const PolymorphismSequenceContainer& psc, bool gapflag) { /* * Sylvain Gaillard 17/03/2010 * What if the Alphabet is not a codon alphabet? */ ConstSiteIterator* si = 0; if (gapflag) si = new NoGapSiteContainerIterator(psc); else si = new SimpleSiteContainerIterator(psc); size_t S = 0; const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); if (CodonSiteTools::hasStop(*site)) S++; } delete si; return S; } size_t SequenceStatistics::monoSitePolymorphicCodonNumber(const PolymorphismSequenceContainer& psc, bool stopflag, bool gapflag) { ConstSiteIterator* si = 0; if (stopflag) si = new CompleteSiteContainerIterator(psc); else { if (gapflag) si = new NoGapSiteContainerIterator(psc); else si = new SimpleSiteContainerIterator(psc); } size_t S = 0; const Site* site; while (si->hasMoreSites()) { site = si->nextSite(); if (CodonSiteTools::isMonoSitePolymorphic(*site)) S++; } delete si; return S; } size_t SequenceStatistics::synonymousPolymorphicCodonNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc) { ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); size_t S = 0; const Site* site; while (si->hasMoreSites()) { site = si->nextSite(); if (CodonSiteTools::isSynonymousPolymorphic(*site, gc)) S++; } delete si; return S; } double SequenceStatistics::watterson75Synonymous(const PolymorphismSequenceContainer& psc, const GeneticCode& gc) { double ThetaW = 0.; size_t n = psc.getNumberOfSequences(); size_t S = synonymousSubstitutionsNumber(psc, gc); map values = getUsefullValues_(n); ThetaW = static_cast(S) / values["a1"]; return ThetaW; } double SequenceStatistics::watterson75NonSynonymous(const PolymorphismSequenceContainer& psc, const GeneticCode& gc) { double ThetaW; size_t n = psc.getNumberOfSequences(); size_t S = nonSynonymousSubstitutionsNumber(psc, gc); map values = getUsefullValues_(n); ThetaW = static_cast(S) / values["a1"]; return ThetaW; } double SequenceStatistics::piSynonymous(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, bool minchange) { double S = 0.; ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); S += CodonSiteTools::piSynonymous(*site, gc, minchange); } delete si; return S; } double SequenceStatistics::piNonSynonymous(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, bool minchange) { double S = 0.; ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); S += CodonSiteTools::piNonSynonymous(*site, gc, minchange); } delete si; return S; } double SequenceStatistics::meanSynonymousSitesNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double ratio) { double S = 0.; ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); S += CodonSiteTools::meanNumberOfSynonymousPositions(*site, gc, ratio); } delete si; return S; } double SequenceStatistics::meanNonSynonymousSitesNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double ratio) { double S = 0.; int n = 0; ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); n = n + 3; S += CodonSiteTools::meanNumberOfSynonymousPositions(*site, gc, ratio); } delete si; return static_cast(n - S); } size_t SequenceStatistics::synonymousSubstitutionsNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double freqmin) { size_t St = 0, Sns = 0; ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); St += CodonSiteTools::numberOfSubsitutions(*site, freqmin); Sns += CodonSiteTools::numberOfNonSynonymousSubstitutions(*site, gc, freqmin); } delete si; return St - Sns; } size_t SequenceStatistics::nonSynonymousSubstitutionsNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double freqmin) { size_t Sns = 0; ConstSiteIterator* si = new CompleteSiteContainerIterator(psc); const Site* site = 0; while (si->hasMoreSites()) { site = si->nextSite(); Sns += CodonSiteTools::numberOfNonSynonymousSubstitutions(*site, gc, freqmin); } delete si; return Sns; } vector SequenceStatistics::fixedDifferences(const PolymorphismSequenceContainer& pscin, const PolymorphismSequenceContainer& pscout, PolymorphismSequenceContainer& psccons, const GeneticCode& gc) { ConstSiteIterator* siIn = new CompleteSiteContainerIterator(pscin); ConstSiteIterator* siOut = new CompleteSiteContainerIterator(pscout); ConstSiteIterator* siCons = new CompleteSiteContainerIterator(psccons); const Site* siteIn = 0; const Site* siteOut = 0; const Site* siteCons = 0; size_t NfixS = 0; size_t NfixA = 0; while (siIn->hasMoreSites()) { siteIn = siIn->nextSite(); siteOut = siOut->nextSite(); siteCons = siCons->nextSite(); vector v = CodonSiteTools::fixedDifferences(*siteIn, *siteOut, siteCons->getValue(0), siteCons->getValue(1), gc); NfixS += v[0]; NfixA += v[1]; } vector v(2); v[0] = NfixS; v[1] = NfixA; delete siIn; delete siOut; delete siCons; return v; } vector SequenceStatistics::MKtable(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, const GeneticCode& gc, double freqmin) { PolymorphismSequenceContainer psctot(ingroup); for (size_t i = 0; i < outgroup.getNumberOfSequences(); i++) { psctot.addSequence(outgroup.getSequence(i)); psctot.setAsOutgroupMember(i + ingroup.getNumberOfSequences()); } const PolymorphismSequenceContainer* psccomplet = PolymorphismSequenceContainerTools::getCompleteSites(psctot); const PolymorphismSequenceContainer* pscin = PolymorphismSequenceContainerTools::extractIngroup(*psccomplet); const PolymorphismSequenceContainer* pscout = PolymorphismSequenceContainerTools::extractOutgroup(*psccomplet); const Sequence* consensusIn = SiteContainerTools::getConsensus(*pscin, "consensusIn"); const Sequence* consensusOut = SiteContainerTools::getConsensus(*pscout, "consensusOut"); PolymorphismSequenceContainer* consensus = new PolymorphismSequenceContainer(ingroup.getAlphabet()); consensus->addSequence(*consensusIn); consensus->addSequence(*consensusOut); vector u = SequenceStatistics::fixedDifferences(*pscin, *pscout, *consensus, gc); vector v(4); v[0] = SequenceStatistics::nonSynonymousSubstitutionsNumber(*pscin, gc, freqmin); v[1] = SequenceStatistics::synonymousSubstitutionsNumber(*pscin, gc, freqmin); v[2] = u[1]; v[3] = u[0]; delete consensus; if (psccomplet) { delete psccomplet; } if (pscin) { delete pscin; } if (pscout) { delete pscout; } if (consensusIn) { delete consensusIn; } if (consensusOut) { delete consensusOut; } return v; } double SequenceStatistics::neutralityIndex(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, const GeneticCode& gc, double freqmin) { vector v = SequenceStatistics::MKtable(ingroup, outgroup, gc, freqmin); if (v[1] != 0 && v[2] != 0) return static_cast(v[0] * v[3]) / static_cast(v[1] * v[2]); else return -1; } // ****************************************************************************** // Statistical tests // ****************************************************************************** double SequenceStatistics::tajimaDSS(const PolymorphismSequenceContainer& psc, bool gapflag) throw (ZeroDivisionException) { double S = static_cast(polymorphicSiteNumber(psc, gapflag)); if (!S) throw ZeroDivisionException("S should not be null"); double tajima = tajima83(psc, gapflag); double watterson = watterson75(psc, gapflag); size_t n = psc.getNumberOfSequences(); map values = getUsefullValues_(n); // if (S == 0) // cout << "ARG S == 0" << endl; return (tajima - watterson) / sqrt((values["e1"] * S) + (values["e2"] * S * (S - 1))); } double SequenceStatistics::tajimaDTNM(const PolymorphismSequenceContainer& psc, bool gapflag) throw (ZeroDivisionException) { double eta = static_cast(totNumberMutations(psc, gapflag)); if (!eta) throw ZeroDivisionException("eta should not be null"); double tajima = tajima83(psc, gapflag); size_t n = psc.getNumberOfSequences(); map values = getUsefullValues_(n); double eta_a1 = static_cast(eta) / values["a1"]; return (tajima - eta_a1) / sqrt((values["e1"] * eta) + (values["e2"] * eta * (eta - 1))); } double SequenceStatistics::fuliD(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, bool original) throw (ZeroDivisionException) { size_t n = ingroup.getNumberOfSequences(); map values = getUsefullValues_(n); double vD = getVD_(n, values["a1"], values["a2"], values["cn"]); double uD = getUD_(values["a1"], vD); double eta = static_cast(totNumberMutations(ingroup)); if (eta == 0.) throw ZeroDivisionException("eta should not be null"); double etae = 0.; if (original) etae = static_cast(countSingleton(outgroup)); else etae = static_cast(totMutationsExternalBranchs(ingroup, outgroup)); // added by Khalid 13/07/2005 return (eta - (values["a1"] * etae)) / sqrt((uD * eta) + (vD * eta * eta)); } double SequenceStatistics::fuliDstar(const PolymorphismSequenceContainer& group) throw (ZeroDivisionException) { size_t n = group.getNumberOfSequences(); double nn = static_cast(n); double _n = nn / (nn - 1.); map values = getUsefullValues_(n); double vDs = getVDstar_(n, values["a1"], values["a2"], values["dn"]); double uDs = getUDstar_(n, values["a1"], vDs); double eta = static_cast(totNumberMutations(group)); if (eta == 0.) throw ZeroDivisionException("eta should not be null"); double etas = static_cast(countSingleton(group)); // Fu & Li 1993 return ((_n * eta) - (values["a1"] * etas)) / sqrt(uDs * eta + vDs * eta * eta); // Simonsen et al. 1995 /* return ((eta / values["a1"]) - (etas * ((n - 1) / n))) / sqrt(uDs * eta + vDs * eta * eta); */ } double SequenceStatistics::fuliF(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, bool original) throw (ZeroDivisionException) { size_t n = ingroup.getNumberOfSequences(); double nn = static_cast(n); map values = getUsefullValues_(n); double pi = tajima83(ingroup, true); double vF = (values["cn"] + values["b2"] - 2. / (nn - 1.)) / (pow(values["a1"], 2) + values["a2"]); double uF = ((1. + values["b1"] - (4. * ((nn + 1.) / ((nn - 1.) * (nn - 1.)))) * (values["a1n"] - (2. * nn) / (nn + 1.))) / values["a1"]) - vF; double eta = static_cast(totNumberMutations(ingroup)); if (eta == 0.) throw ZeroDivisionException("eta should not be null"); double etae = 0.; if (original) etae = static_cast(countSingleton(outgroup)); else etae = static_cast(totMutationsExternalBranchs(ingroup, outgroup)); // added by Khalid 13/07/2005 return (pi - etae) / sqrt(uF * eta + vF * eta * eta); } double SequenceStatistics::fuliFstar(const PolymorphismSequenceContainer& group) throw (ZeroDivisionException) { double n = static_cast(group.getNumberOfSequences()); map values = getUsefullValues_(group.getNumberOfSequences()); double pi = tajima83(group, true); // Fu & Li 1993 // double vFs = (values["dn"] + values["b2"] - (2. / (nn - 1.)) * (4. * values["a2"] - 6. + 8. / nn)) / (pow(values["a1"], 2) + values["a2"]); // double uFs = (((nn / (nn - 1.)) + values["b1"] - (4. / (nn * (nn - 1.))) + 2. * ((nn + 1.) / (pow((nn - 1.), 2))) * (values["a1n"] - 2. * nn / (nn + 1.))) / values["a1"]) - vFs; // Simonsen et al. 1995 double vFs = (((2 * n * n * n + 110 * n * n - 255 * n + 153) / (9 * n * n * (n - 1))) + ((2 * (n - 1) * values["a1"]) / (n * n)) - 8 * values["a2"] / n) / (pow(values["a1"], 2) + values["a2"]); double uFs = (((4 * n * n + 19 * n + 3 - 12 * (n + 1) * values["a1n"]) / (3 * n * (n - 1))) / values["a1"]) - vFs; double eta = static_cast(totNumberMutations(group)); if (eta == 0.) throw ZeroDivisionException("eta should not be null"); double etas = static_cast(countSingleton(group)); // Fu & Li 1993 // Simonsen et al. 1995 return (pi - ((n - 1.) / n * etas)) / sqrt(uFs * eta + vFs * eta * eta); } double SequenceStatistics::FstHudson92(const PolymorphismSequenceContainer& psc, size_t id1, size_t id2) { vector vdiff; double piIntra1, piIntra2, meanPiIntra, piInter, Fst; PolymorphismSequenceContainer* Pop1 = PolymorphismSequenceContainerTools::extractGroup(psc, id1); PolymorphismSequenceContainer* Pop2 = PolymorphismSequenceContainerTools::extractGroup(psc, id2); piIntra1 = SequenceStatistics::tajima83(*Pop1, false); piIntra2 = SequenceStatistics::tajima83(*Pop2, false); meanPiIntra = (piIntra1 + piIntra2) / 2; double n = 0; for (size_t i = 0; i < Pop1->getNumberOfSequences(); i++) { const Sequence& s1 = Pop1->getSequence(i); for (size_t j = 0; j < Pop2->getNumberOfSequences(); j++) { n++; const Sequence& s2 = Pop2->getSequence(j); vdiff.push_back(SiteContainerTools::computeSimilarity(s1, s2, true, "no gap", true)); } } piInter = (VectorTools::sum(vdiff) / n) * static_cast(psc.getNumberOfSites()); Fst = 1.0 - meanPiIntra / piInter; delete Pop1; delete Pop2; return Fst; } // ****************************************************************************** // Linkage disequilibrium statistics // ****************************************************************************** /**********************/ /* Preliminary method */ /**********************/ PolymorphismSequenceContainer* SequenceStatistics::generateLDContainer(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) { SiteSelection ss; // Extract polymorphic site with only two alleles for (size_t i = 0; i < psc.getNumberOfSites(); i++) { if (keepsingleton) { if (SiteTools::isComplete(psc.getSite(i)) && !SiteTools::isConstant(psc.getSite(i)) && !SiteTools::isTriplet(psc.getSite(i))) { ss.push_back(i); } } else { if (SiteTools::isComplete(psc.getSite(i)) && !SiteTools::isConstant(psc.getSite(i)) && !SiteTools::isTriplet(psc.getSite(i)) && !SiteTools::hasSingleton(psc.getSite(i))) { ss.push_back(i); } } } const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss); Alphabet* alpha = new DNA(); // Sylvain Gaillard 17/03/2010: What if psc's Alphabet is not DNA PolymorphismSequenceContainer* ldpsc = new PolymorphismSequenceContainer(sc->getNumberOfSequences(), alpha); // Assign 1 to the more frequent and 0 to the less frequent alleles for (size_t i = 0; i < sc->getNumberOfSites(); i++) { const Site& site = sc->getSite(i); Site siteclone(site); bool deletesite = false; map freqs; SymbolListTools::getFrequencies(siteclone, freqs); int first = 0; for (map::iterator it = freqs.begin(); it != freqs.end(); it++) { if (it->second >= 0.5) first = it->first; } for (size_t j = 0; j < sc->getNumberOfSequences(); j++) { if (freqs[site.getValue(j)] >= 0.5 && site.getValue(j) == first) { if (freqs[site.getValue(j)] <= 1 - freqmin) { siteclone.setElement(j, 1); first = site.getValue(j); } else deletesite = true; } else { if (freqs[site.getValue(j)] >= freqmin) siteclone.setElement(j, 0); else deletesite = true; } } if (!deletesite) ldpsc->addSite(siteclone); } delete alpha; return ldpsc; } /*************************************/ /* Pairwise LD and distance measures */ /*************************************/ Vdouble SequenceStatistics::pairwiseDistances1(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException) { // get Positions with sites of interest SiteSelection ss; for (size_t i = 0; i < psc.getNumberOfSites(); i++) { if (keepsingleton) { if (SiteTools::isComplete(psc.getSite(i)) && !SiteTools::isConstant(psc.getSite(i)) && !SiteTools::isTriplet(psc.getSite(i))) { const Site& site = psc.getSite(i); bool deletesite = false; map freqs; SymbolListTools::getFrequencies(site, freqs); for (int j = 0; j < static_cast(site.getAlphabet()->getSize()); j++) { if (freqs[j] >= 1 - freqmin) deletesite = true; } if (!deletesite) ss.push_back(i); } } else { if (SiteTools::isComplete(psc.getSite(i)) && !SiteTools::isConstant(psc.getSite(i)) && !SiteTools::isTriplet(psc.getSite(i)) && !SiteTools::hasSingleton(psc.getSite(i))) { ss.push_back(i); const Site& site = psc.getSite(i); bool deletesite = false; map freqs; SymbolListTools::getFrequencies(site, freqs); for (int j = 0; j < static_cast(site.getAlphabet()->getSize()); j++) { if (freqs[j] >= 1 - freqmin) deletesite = true; } if (!deletesite) ss.push_back(i); } } } // compute pairwise distances if (ss.size() < 2) throw DimensionException("SequenceStatistics::pairwiseDistances1 : less than 2 sites are available", ss.size(), 2); Vdouble dist; for (size_t i = 0; i < ss.size() - 1; i++) { for (size_t j = i + 1; j < ss.size(); j++) { dist.push_back(static_cast(ss[j] - ss[i])); } } return dist; } Vdouble SequenceStatistics::pairwiseDistances2(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException) { SiteSelection ss; for (size_t i = 0; i < psc.getNumberOfSites(); i++) { if (keepsingleton) { if (SiteTools::isComplete(psc.getSite(i)) && !SiteTools::isConstant(psc.getSite(i)) && !SiteTools::isTriplet(psc.getSite(i))) { const Site& site = psc.getSite(i); bool deletesite = false; map freqs; SymbolListTools::getFrequencies(site, freqs); for (int j = 0; j < static_cast(site.getAlphabet()->getSize()); j++) { if (freqs[j] >= 1 - freqmin) deletesite = true; } if (!deletesite) ss.push_back(i); } } else { if (SiteTools::isComplete(psc.getSite(i)) && !SiteTools::isConstant(psc.getSite(i)) && !SiteTools::isTriplet(psc.getSite(i)) && !SiteTools::hasSingleton(psc.getSite(i))) { ss.push_back(i); const Site& site = psc.getSite(i); bool deletesite = false; map freqs; SymbolListTools::getFrequencies(site, freqs); for (int j = 0; j < static_cast(site.getAlphabet()->getSize()); j++) { if (freqs[j] >= 1 - freqmin) deletesite = true; } if (!deletesite) ss.push_back(i); } } } size_t n = ss.size(); if (n < 2) throw DimensionException("SequenceStatistics::pairwiseDistances1 : less than 2 sites are available", ss.size(), 2); Vdouble distance(n * (n - 1) / 2, 0); size_t nbsite = psc.getNumberOfSites(); for (size_t k = 0; k < psc.getNumberOfSequences(); k++) { const Sequence& seq = psc.getSequence(k); SiteSelection gap, newss = ss; Vdouble dist; for (size_t i = 0; i < nbsite; i++) { if (seq.getValue(i) == -1) gap.push_back(i); } // Site positions are re-numbered to take gaps into account for (size_t i = 0; i < gap.size(); i++) { for (size_t j = 0; j < ss.size(); j++) { if (ss[j] > gap[i]) newss[j]--; } } for (size_t i = 0; i < n - 1; i++) { for (size_t j = i + 1; j < n; j++) { dist.push_back(static_cast(newss[j] - newss[i])); } } distance += dist; } distance = distance / static_cast(psc.getNumberOfSequences()); return distance; } Vdouble SequenceStatistics::pairwiseD(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException) { PolymorphismSequenceContainer* newpsc = SequenceStatistics::generateLDContainer(psc, keepsingleton, freqmin); Vdouble D; size_t nbsite = newpsc->getNumberOfSites(); size_t nbseq = newpsc->getNumberOfSequences(); if (nbsite < 2) throw DimensionException("SequenceStatistics::pairwiseD: less than two sites are available", nbsite, 2); if (nbseq < 2) throw DimensionException("SequenceStatistics::pairwiseD: less than two sequences are available", nbseq, 2); for (size_t i = 0; i < nbsite - 1; i++) { for (size_t j = i + 1; j < nbsite; j++) { double haplo = 0; const Site& site1 = newpsc->getSite(i); const Site& site2 = newpsc->getSite(j); map freq1; map freq2; SymbolListTools::getFrequencies(site1, freq1); SymbolListTools::getFrequencies(site2, freq2); for (size_t k = 0; k < nbseq; k++) { if (site1.getValue(k) + site2.getValue(k) == 2) haplo++; } haplo = haplo / static_cast(nbseq); D.push_back(std::abs(haplo - freq1[1] * freq2[1])); } } return D; } Vdouble SequenceStatistics::pairwiseDprime(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException) { PolymorphismSequenceContainer* newpsc = SequenceStatistics::generateLDContainer(psc, keepsingleton, freqmin); Vdouble Dprime; size_t nbsite = newpsc->getNumberOfSites(); size_t nbseq = newpsc->getNumberOfSequences(); if (nbsite < 2) throw DimensionException("SequenceStatistics::pairwiseD: less than two sites are available", nbsite, 2); if (nbseq < 2) throw DimensionException("SequenceStatistics::pairwiseD: less than two sequences are available", nbseq, 2); for (size_t i = 0; i < nbsite - 1; i++) { for (size_t j = i + 1; j < nbsite; j++) { double haplo = 0; const Site& site1 = newpsc->getSite(i); const Site& site2 = newpsc->getSite(j); map freq1; map freq2; SymbolListTools::getFrequencies(site1, freq1); SymbolListTools::getFrequencies(site2, freq2); for (size_t k = 0; k < nbseq; k++) { if (site1.getValue(k) + site2.getValue(k) == 2) haplo++; } haplo = haplo / static_cast(nbseq); double d, D = (haplo - freq1[1] * freq2[1]); if (D > 0) { if (freq1[1] * freq2[0] <= freq1[0] * freq2[1]) { d = std::abs(D) / (freq1[1] * freq2[0]); } else { d = std::abs(D) / (freq1[0] * freq2[1]); } } else { if (freq1[1] * freq2[1] <= freq1[0] * freq2[0]) { d = std::abs(D) / (freq1[1] * freq2[1]); } else { d = std::abs(D) / (freq1[0] * freq2[0]); } } Dprime.push_back(d); } } return Dprime; } Vdouble SequenceStatistics::pairwiseR2(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException) { PolymorphismSequenceContainer* newpsc = SequenceStatistics::generateLDContainer(psc, keepsingleton, freqmin); Vdouble R2; size_t nbsite = newpsc->getNumberOfSites(); size_t nbseq = newpsc->getNumberOfSequences(); if (nbsite < 2) throw DimensionException("SequenceStatistics::pairwiseD: less than two sites are available", nbsite, 2); if (nbseq < 2) throw DimensionException("SequenceStatistics::pairwiseD: less than two sequences are available", nbseq, 2); for (size_t i = 0; i < nbsite - 1; i++) { for (size_t j = i + 1; j < nbsite; j++) { double haplo = 0; const Site& site1 = newpsc->getSite(i); const Site& site2 = newpsc->getSite(j); map freq1; map freq2; SymbolListTools::getFrequencies(site1, freq1); SymbolListTools::getFrequencies(site2, freq2); for (size_t k = 0; k < nbseq; k++) { if (site1.getValue(k) + site2.getValue(k) == 2) haplo++; } haplo = haplo / static_cast(nbseq); double r = ((haplo - freq1[1] * freq2[1]) * (haplo - freq1[1] * freq2[1])) / (freq1[0] * freq1[1] * freq2[0] * freq2[1]); R2.push_back(r); } } return R2; } /***********************************/ /* Global LD and distance measures */ /***********************************/ double SequenceStatistics::meanD(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException) { Vdouble D = pairwiseD(psc, keepsingleton, freqmin); return VectorTools::mean(D); } double SequenceStatistics::meanDprime(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException) { try { Vdouble Dprime = pairwiseDprime(psc, keepsingleton, freqmin); return VectorTools::mean(Dprime); } catch (DimensionException& e) { throw e; } } double SequenceStatistics::meanR2(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException) { try { Vdouble R2 = SequenceStatistics::pairwiseR2(psc, keepsingleton, freqmin); return VectorTools::mean(R2); } catch (DimensionException& e) { throw e; } } double SequenceStatistics::meanDistance1(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException) { try { Vdouble dist = pairwiseDistances1(psc, keepsingleton, freqmin); return VectorTools::mean(dist); } catch (DimensionException& e) { throw e; } } double SequenceStatistics::meanDistance2(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException) { try { Vdouble dist = pairwiseDistances2(psc, keepsingleton, freqmin); return VectorTools::mean(dist); } catch (DimensionException& e) { throw e; } } /**********************/ /* Regression methods */ /**********************/ double SequenceStatistics::originRegressionD(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException) { try { Vdouble D = pairwiseD(psc, keepsingleton, freqmin) - 1; Vdouble dist; if (distance1) dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000; else dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000; return VectorTools::sum(D * dist) / VectorTools::sum(dist * dist); } catch (DimensionException& e) { throw e; } } double SequenceStatistics::originRegressionDprime(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException) { try { Vdouble Dprime = pairwiseDprime(psc, keepsingleton, freqmin) - 1; Vdouble dist; if (distance1) dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000; else dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000; return VectorTools::sum(Dprime * dist) / VectorTools::sum(dist * dist); } catch (DimensionException& e) { throw e; } } double SequenceStatistics::originRegressionR2(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException) { try { Vdouble R2 = pairwiseR2(psc, keepsingleton, freqmin) - 1; Vdouble dist; if (distance1) dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000; else dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000; return VectorTools::sum(R2 * dist) / VectorTools::sum(dist * dist); } catch (DimensionException& e) { throw e; } } Vdouble SequenceStatistics::linearRegressionD(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException) { try { Vdouble D = pairwiseD(psc, keepsingleton, freqmin); Vdouble dist; Vdouble reg(2); if (distance1) dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000; else dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000; reg[0] = VectorTools::cov(dist, D) / VectorTools::var(dist); reg[1] = VectorTools::mean(D) - reg[0] * VectorTools::mean(dist); return reg; } catch (DimensionException& e) { throw e; } } Vdouble SequenceStatistics::linearRegressionDprime(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException) { try { Vdouble Dprime = pairwiseDprime(psc, keepsingleton, freqmin); Vdouble dist; Vdouble reg(2); if (distance1) dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000; else dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000; reg[0] = VectorTools::cov(dist, Dprime) / VectorTools::var(dist); reg[1] = VectorTools::mean(Dprime) - reg[0] * VectorTools::mean(dist); return reg; } catch (DimensionException& e) { throw e; } } Vdouble SequenceStatistics::linearRegressionR2(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException) { try { Vdouble R2 = pairwiseR2(psc, keepsingleton, freqmin); Vdouble dist; Vdouble reg(2); if (distance1) dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000; else dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000; reg[0] = VectorTools::cov(dist, R2) / VectorTools::var(dist); reg[1] = VectorTools::mean(R2) - reg[0] * VectorTools::mean(dist); return reg; } catch (DimensionException& e) { throw e; } } double SequenceStatistics::inverseRegressionR2(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException) { try { Vdouble R2 = pairwiseR2(psc, keepsingleton, freqmin); Vdouble unit(R2.size(), 1); Vdouble R2transformed = unit / R2 - 1; Vdouble dist; if (distance1) dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000; else dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000; return VectorTools::sum(R2transformed * dist) / VectorTools::sum(dist * dist); } catch (DimensionException& e) { throw e; } } /**********************/ /* Hudson method */ /**********************/ double SequenceStatistics::hudson87(const PolymorphismSequenceContainer& psc, double precision, double cinf, double csup) { double left = leftHandHudson_(psc); size_t n = psc.getNumberOfSequences(); double dif = 1; double c1 = cinf; double c2 = csup; if (SequenceStatistics::polymorphicSiteNumber(psc) < 2) return -1; if (rightHandHudson_(c1, n) < left) return cinf; if (rightHandHudson_(c2, n) > left) return csup; while (dif > precision) { if (rightHandHudson_((c1 + c2) / 2, n) > left) c1 = (c1 + c2) / 2; else c2 = (c1 + c2) / 2; dif = std::abs(2 * (c1 - c2) / (c1 + c2)); } return (c1 + c2) / 2; } /*****************/ /* Tests methods */ /*****************/ void SequenceStatistics::testUsefullValues(std::ostream& s, size_t n) { map v = getUsefullValues_(n); double vD = getVD_(n, v["a1"], v["a2"], v["cn"]); double uD = getUD_(v["a1"], vD); double vDs = getVDstar_(n, v["a1"], v["a2"], v["dn"]); double uDs = getUDstar_(n, v["a1"], vDs); s << n << "\t"; s << v["a1"] << "\t"; s << v["a2"] << "\t"; s << v["a1n"] << "\t"; s << v["b1"] << "\t"; s << v["b2"] << "\t"; s << v["c1"] << "\t"; s << v["c2"] << "\t"; s << v["cn"] << "\t"; s << v["dn"] << "\t"; s << v["e1"] << "\t"; s << v["e2"] << "\t"; s << uD << "\t"; s << vD << "\t"; s << uDs << "\t"; s << vDs << endl; } // ****************************************************************************** // Private methods // ****************************************************************************** size_t SequenceStatistics::getMutationNumber_(const Site& site) { size_t tmp_count = 0; map states_count; SymbolListTools::getCounts(site, states_count); for (map::iterator it = states_count.begin(); it != states_count.end(); it++) { if (it->first >= 0) tmp_count++; } if (tmp_count > 0) tmp_count--; return tmp_count; } size_t SequenceStatistics::getSingletonNumber_(const Site& site) { size_t nus = 0; map states_count; SymbolListTools::getCounts(site, states_count); for (map::iterator it = states_count.begin(); it != states_count.end(); it++) { if (it->second == 1) nus++; } return nus; } size_t SequenceStatistics::getDerivedSingletonNumber_(const Site& site_in, const Site& site_out) { size_t nus = 0; map states_count; map outgroup_states_count; SymbolListTools::getCounts(site_in, states_count); SymbolListTools::getCounts(site_out, outgroup_states_count); // if there is more than one variant in the outgroup we will not be able to recover the ancestral state if (outgroup_states_count.size() == 1) { for (map::iterator it = states_count.begin(); it != states_count.end(); it++) { if (it->second == 1) { if (outgroup_states_count.find(it->first) == outgroup_states_count.end()) nus++; } } } return nus; } std::map SequenceStatistics::getUsefullValues_(size_t n) { double nn = static_cast(n); map values; values["a1"] = 0.; values["a2"] = 0.; values["a1n"] = 0.; values["b1"] = 0.; values["b2"] = 0.; values["c1"] = 0.; values["c2"] = 0.; values["cn"] = 0.; values["dn"] = 0.; values["e1"] = 0.; values["e2"] = 0.; if (n > 1) { for (double i = 1; i < nn; i++) { values["a1"] += 1. / i; values["a2"] += 1. / (i * i); } values["a1n"] = values["a1"] + (1. / nn); values["b1"] = (nn + 1.) / (3. * (nn - 1.)); values["b2"] = 2. * ((nn * nn) + nn + 3.) / (9. * nn * (nn - 1.)); values["c1"] = values["b1"] - (1. / values["a1"]); values["c2"] = values["b2"] - ((nn + 2.) / (values["a1"] * nn)) + (values["a2"] / (values["a1"] * values["a1"])); if (n == 2) { values["cn"] = 1.; values["dn"] = 2.; } else { values["cn"] = 2. * ((nn * values["a1"]) - (2. * (nn - 1.))) / ((nn - 1.) * (nn - 2.)); values["dn"] = values["cn"] + ((nn - 2.) / ((nn - 1.) * (nn - 1.))) + (2. / (nn - 1.)) * ((3. / 2.) - (((2. * values["a1n"]) - 3.) / (nn - 2.)) - (1. / nn)); } values["e1"] = values["c1"] / values["a1"]; values["e2"] = values["c2"] / ((values["a1"] * values["a1"]) + values["a2"]); } return values; } double SequenceStatistics::getVD_(size_t n, double a1, double a2, double cn) { double nn = static_cast(n); if (n < 3) return 0.; double vD = 1. + ((a1 * a1) / (a2 + (a1 * a1))) * (cn - ((nn + 1.) / (nn - 1.))); return vD; } double SequenceStatistics::getUD_(double a1, double vD) { return a1 - 1. - vD; } double SequenceStatistics::getVDstar_(size_t n, double a1, double a2, double dn) { double denom = (a1 * a1) + a2; if (n < 3 || denom == 0.) return 0.; double nn = static_cast(n); double nnn = nn / (nn - 1.); // Fu & Li 1993 double vDs = ( (nnn * nnn * a2) + (a1 * a1 * dn) - (2. * (nn * a1 * (a1 + 1)) / ((nn - 1.) * (nn - 1.))) ) / denom; // Simonsen et al. 1995 /* double vDs = ( (values["a2"] / pow(values["a1"], 2)) - (2./nn) * (1. + 1./values["a1"] - values["a1"] + values["a1"]/nn) - 1./(nn*nn) ) / (pow(values["a1"], 2) + values["a2"]); */ return vDs; } double SequenceStatistics::getUDstar_(size_t n, double a1, double vDs) { if (n < 3) return 0.; double nn = static_cast(n); double nnn = nn / (nn - 1.); // Fu & Li 1993 double uDs = (nnn * (a1 - nnn)) - vDs; // Simonsen et al. 1995 /* double uDs = (((nn - 1.)/nn - 1./values["a1"]) / values["a1"]) - vDs; */ return uDs; } double SequenceStatistics::leftHandHudson_(const PolymorphismSequenceContainer& psc) { PolymorphismSequenceContainer* newpsc = PolymorphismSequenceContainerTools::getCompleteSites(psc); size_t nbseq = newpsc->getNumberOfSequences(); double S1 = 0; double S2 = 0; for (size_t i = 0; i < nbseq - 1; i++) { for (size_t j = i + 1; j < nbseq; j++) { SequenceSelection ss(2); ss[0] = i; ss[1] = j; PolymorphismSequenceContainer* psc2 = PolymorphismSequenceContainerTools::getSelectedSequences(*newpsc, ss); S1 += SequenceStatistics::watterson75(*psc2, true); S2 += SequenceStatistics::watterson75(*psc2, true) * SequenceStatistics::watterson75(*psc2, true); delete psc2; } } double Sk = (2 * S2 - pow(2 * S1 / static_cast(nbseq), 2.)) / pow(nbseq, 2.); double H = SequenceStatistics::heterozygosity(*newpsc); double H2 = SequenceStatistics::squaredHeterozygosity(*newpsc); delete newpsc; return static_cast(Sk - H + H2) / pow(H * static_cast(nbseq) / static_cast(nbseq - 1), 2.); } double SequenceStatistics::rightHandHudson_(double c, size_t n) { double nn = static_cast(n); return 1. / (97. * pow(c, 2.) * pow(nn, 3.)) * ((nn - 1.) * (97. * (c * (4. + (c - 2. * nn) * nn) + (-2. * (7. + c) + 4. * nn + (c - 1.) * pow(nn, 2.)) * log((18. + c * (13. + c)) / 18.)) + sqrt(97.) * (110. + nn * (49. * nn - 52.) + c * (2. + nn * (15. * nn - 8.))) * log(-1. + (72. + 26. * c) / (36. + 13. * c - c * sqrt(97.))))); } bpp-popgen-2.1.0/src/Bpp/PopGen/Genetix.cpp000644 000000 000000 00000010735 12147656633 020371 0ustar00rootroot000000 000000 // // File Genetix.cpp // Authors : Sylvain Gaillard // Khalid Belkhir // Last modification : Monday August 02 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "Genetix.h" using namespace bpp; using namespace std; Genetix::Genetix() {} Genetix::~Genetix() {} void Genetix::read(istream& is, DataSet& data_set) throw (Exception) { if (!is) throw IOException("Genetix::read: fail to open stream."); // Loci number string temp = FileTools::getNextLine(is); unsigned int loc_nbr; stringstream(temp) >> loc_nbr; data_set.initAnalyzedLoci(loc_nbr); // Groups number temp = FileTools::getNextLine(is); unsigned int grp_nbr; stringstream(temp) >> grp_nbr; // Loci data for (unsigned int i = 0; i < loc_nbr; i++) { // Locus name string name = FileTools::getNextLine(is); name = TextTools::removeSurroundingWhiteSpaces(name); LocusInfo tmp_loc(name); // Alleles stringstream values(FileTools::getNextLine(is)); unsigned int nbr_al; values >> nbr_al; for (unsigned int j = 0; j < nbr_al; j++) { string al_id; values >> al_id; BasicAlleleInfo tmp_al(al_id); tmp_loc.addAlleleInfo(tmp_al); } data_set.setLocusInfo(i, tmp_loc); } // Groups for (unsigned int i = 0; i < grp_nbr; i++) { data_set.addEmptyGroup(i); // Group name ... Now used khalid temp = FileTools::getNextLine(is); data_set.setGroupName(i, temp); // Number of individuals unsigned int ind_nbr; temp = FileTools::getNextLine(is); stringstream tmp(temp); tmp >> ind_nbr; for (unsigned int j = 0; j < ind_nbr; j++) { temp = FileTools::getNextLine(is); string ind_name(temp.begin(), temp.begin() + 11); temp = string(temp.begin() + 11, temp.end()); data_set.addEmptyIndividualToGroup(i, TextTools::removeSurroundingWhiteSpaces(ind_name) + string("_") + TextTools::toString(i + 1) + string("_") + TextTools::toString(j + 1)); data_set.initIndividualGenotypeInGroup(i, j); StringTokenizer alleles(temp, string(" ")); // cout << alleles.numberOfRemainingTokens() << endl; for (unsigned int k = 0; k < loc_nbr; k++) { string tmp_string = alleles.nextToken(); vector tmp_alleles; tmp_alleles.push_back(string(tmp_string.begin(), tmp_string.begin() + 3)); tmp_alleles.push_back(string(tmp_string.begin() + 3, tmp_string.begin() + 6)); if (tmp_alleles[0] != string("000") && tmp_alleles[1] != string("000")) data_set.setIndividualMonolocusGenotypeByAlleleIdInGroup(i, j, k, tmp_alleles); } } } } void Genetix::read(const string& path, DataSet& data_set) throw (Exception) { AbstractIDataSet::read(path, data_set); } DataSet* Genetix::read(istream& is) throw (Exception) { return AbstractIDataSet::read(is); } DataSet* Genetix::read(const string& path) throw (Exception) { return AbstractIDataSet::read(path); } bpp-popgen-2.1.0/src/Bpp/PopGen/MonolocusGenotypeTools.h000644 000000 000000 00000005641 12147656633 023145 0ustar00rootroot000000 000000 // // File MonolocusGenotypeTools.h // Author : Sylvain Gaillard // Last modification : April 4, 2008 // /* Copyright or © or Copr. Bio++ Development Team, (April 4, 2008) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ // Secured inclusion of header's file #ifndef _MonolocusGenotypeTools_h_ #define _MonolocusGenotypeTools_h_ // From STL #include #include #include // From Pop #include "MonolocusGenotype.h" namespace bpp { /** * @brief The MonolocusGenotypeTools static class. * * This class provides tools for MonolocusGenotype manipulation or creation. * * @author Sylvain Gaillard */ class MonolocusGenotypeTools { public: /** * @brief Build a proper MonolocusGenotype accordig to the number of alleles. * * Return a MonolocusGenotype build according to the number of allels. * If one allele key, send a MonoAlleleMonolocusGenotype, * if two allele keys, send a BiAlleleMonolocusGenotype, * if more allele keys, send a MultiAlleleMonolocusGenotype. * * @param allele_keys A vector containing thes allele keys to put in the MonolocusGenotype. * @return A MonolocusGenotype according to the number of alleles */ static std::auto_ptr buildMonolocusGenotypeByAlleleKey(const std::vector allele_keys) throw (Exception); }; } // end of namespace bpp; #endif // _MonolocusGenotypeTools_h_ bpp-popgen-2.1.0/src/Bpp/PopGen/AbstractODataSet.h000644 000000 000000 00000004532 12147656633 021561 0ustar00rootroot000000 000000 // // File AbstractODataSet.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ABSTRACTODATASET_H_ #define _ABSTRACTODATASET_H_ #include "ODataSet.h" namespace bpp { /** * @brief Partial implementation of the DataSet Output interface. * * @author Sylvain Gaillard */ class AbstractODataSet : public ODataSet { public: virtual ~AbstractODataSet(); public: /** * @name The ODataSet interface. * @{ */ virtual void write(std::ostream& os, const DataSet& data_set) const throw (Exception) = 0; virtual void write(const std::string& path, const DataSet& data_set, bool overwrite) const throw (Exception); /** * @} */ }; } // end of namespace bpp; #endif // _ABSTRACTODATASET_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/PopgenlibIO.cpp000644 000000 000000 00000053145 12147656633 021137 0ustar00rootroot000000 000000 // // File PopgenlibIO.cpp // Created by: Sylvain Gaillard // Created on: Thursday July 29 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "PopgenlibIO.h" using namespace bpp; using namespace std; const string PopgenlibIO::WHITESPACE = string("WHITESPACE"); const string PopgenlibIO::TAB = string("TAB"); const string PopgenlibIO::COMA = string("COMA"); const string PopgenlibIO::SEMICOLON = string("SEMICOLON"); const string PopgenlibIO::DIPLOID = string("DIPLOID"); const string PopgenlibIO::HAPLOID = string("HAPLOID"); const string PopgenlibIO::HAPLODIPLOID = string("HAPLODIPLOID"); const string PopgenlibIO::UNKNOWN = string("UNKNOWN"); PopgenlibIO::PopgenlibIO() : data_separator_(' '), missing_data_symbol_('$') {} PopgenlibIO::PopgenlibIO(const std::string& missing_data_symbol, const std::string& data_separator) throw (Exception) : data_separator_(' '), missing_data_symbol_('$') { try { setDataSeparator(data_separator); setMissingDataSymbol(missing_data_symbol); } catch (Exception& e) { throw e; } } PopgenlibIO::~PopgenlibIO() {} void PopgenlibIO::setMissingDataSymbol(const std::string& missing_data_symbol) throw (Exception) { if (missing_data_symbol.size() != 1 || isdigit(missing_data_symbol[0]) || TextTools::isWhiteSpaceCharacter(missing_data_symbol[0]) || missing_data_symbol[0] == data_separator_ ) throw Exception("PopgenlibIO::setMissingData: not expected value for missing_data_symbol."); missing_data_symbol_ = missing_data_symbol[0]; } void PopgenlibIO::setDataSeparator(const std::string& data_separator) throw (Exception) { if (data_separator == WHITESPACE) data_separator_ = ' '; else if (data_separator == TAB) data_separator_ = '\t'; else if (data_separator == COMA) data_separator_ = ','; else if (data_separator == SEMICOLON) data_separator_ = ';'; else { if (isdigit(data_separator[0]) || data_separator == getMissingDataSymbol() ) throw Exception("PopgenlibIO::setDataSeparator: not expected value for data_separator."); data_separator_ = data_separator.c_str()[0]; } } std::string PopgenlibIO::getMissingDataSymbol() const { return TextTools::toString(missing_data_symbol_); } std::string PopgenlibIO::getDataSeparator() const { switch (data_separator_) { case (' '): return WHITESPACE; case ('\t'): return TAB; case (','): return COMA; case (';'): return SEMICOLON; default: return TextTools::toString(data_separator_); } } char PopgenlibIO::getMissingDataChar() const { return missing_data_symbol_; } char PopgenlibIO::getDataSeparatorChar() const { return data_separator_; } void PopgenlibIO::read(std::istream& is, DataSet& data_set) throw (Exception) { if (!is) throw IOException("PopgenlibIO::read: fail to open stream."); string temp = ""; vector temp_v; stringstream tmp_ss; VectorSequenceContainer* tmp_vsc = NULL; Locality tmp_locality("tmp"); vector tmp_locinf; Individual tmp_indiv; bool section1 = true; bool section2 = true; bool section3 = true; bool section4 = true; bool section5 = true; size_t current_section = 0; size_t previous_section = 0; size_t linenum = 0; // Main loop for all file lines while (!is.eof()) { temp = FileTools::getNextLine(is); linenum++; // Get the correct current section if (temp.find("[General]", 0) != string::npos) { previous_section = current_section; current_section = 1; continue; } else if (temp.find("[Localities]", 0) != string::npos) { previous_section = current_section; current_section = 2; continue; } else if (temp.find("[Sequences]", 0) != string::npos) { previous_section = current_section; current_section = 3; continue; } else if (temp.find("[Loci]", 0) != string::npos) { previous_section = current_section; current_section = 4; continue; } else if (temp.find("[Individuals]", 0) != string::npos) { previous_section = current_section; current_section = 5; continue; } // General section ------------------------------------ if (current_section == 1 && previous_section < 1) { temp_v.push_back(temp); } if (section1 && current_section != 1 && previous_section == 1) { section1 = false; parseGeneral_(temp_v, data_set); temp_v.clear(); if (data_set.hasSequenceData() && tmp_vsc == NULL) tmp_vsc = new VectorSequenceContainer(data_set.getAlphabet()); } // Localities section --------------------------------- if (current_section == 2 && previous_section < 2) { if (temp.find(">", 0) != string::npos) { parseLocality_(temp_v, data_set); temp_v.clear(); temp_v.push_back(temp); } else temp_v.push_back(temp); } if (section2 && current_section != 2 && previous_section == 2) { section2 = false; parseLocality_(temp_v, data_set); temp_v.clear(); } // Sequences section ---------------------------------- if (current_section == 3 && previous_section < 3) { if (temp.find(">", 0) != string::npos) { parseSequence_(temp_v, *tmp_vsc); temp_v.clear(); temp_v.push_back(temp); } else temp_v.push_back(temp); } if (section3 && current_section != 3 && previous_section == 3) { section3 = false; parseSequence_(temp_v, *tmp_vsc); temp_v.clear(); } // Loci section --------------------------------------- if (current_section == 4 && previous_section < 4) { if (temp.find(">", 0) != string::npos) { parseLoci_(temp_v, tmp_locinf); temp_v.clear(); temp_v.push_back(temp); } else temp_v.push_back(temp); } if (section4 && current_section != 4 && previous_section == 4) { section4 = false; parseLoci_(temp_v, tmp_locinf); temp_v.clear(); AnalyzedLoci tmp_anloc(tmp_locinf.size()); for (size_t i = 0; i < tmp_locinf.size(); i++) { tmp_anloc.setLocusInfo(i, tmp_locinf[i]); } data_set.setAnalyzedLoci(tmp_anloc); } // Individuals section -------------------------------- if (current_section == 5 && previous_section < 5) { if (temp.find(">", 0) != string::npos) { parseIndividual_(temp_v, data_set, *tmp_vsc); temp_v.clear(); temp_v.push_back(temp); } else temp_v.push_back(temp); } if (section5 && current_section != 5 && previous_section == 5) { section5 = false; parseIndividual_(temp_v, data_set, *tmp_vsc); temp_v.clear(); } } // Emptied the buffer if eof. if (section2 && current_section == 2) parseLocality_(temp_v, data_set); if (section3 && current_section == 3) parseSequence_(temp_v, *tmp_vsc); if (section5 && current_section == 5) parseIndividual_(temp_v, data_set, *tmp_vsc); temp_v.clear(); } void PopgenlibIO::parseGeneral_(const std::vector& in, DataSet& data_set) { stringstream is; for (size_t i = 0; i < in.size(); i++) { is << in[i] << endl; } string temp; while (!is.eof() && in.size() != 0) { temp = FileTools::getNextLine(is); if (temp.find("MissingData", 0) != string::npos) setMissingDataSymbol(getValues_(temp, "=")[0]); if (temp.find("DataSeparator", 0) != string::npos) setDataSeparator(getValues_(temp, "=")[0]); if (temp.find("SequenceType", 0) != string::npos) data_set.setAlphabet(getValues_(temp, "=")[0]); } } void PopgenlibIO::parseLocality_(const std::vector& in, DataSet& data_set) { stringstream is; for (size_t i = 0; i < in.size(); i++) { is << in[i] << endl; } Locality tmp_locality(""); string temp; while (!is.eof() && in.size() != 0) { temp = FileTools::getNextLine(is); // cout << "_parseLocality: " << temp << endl; if (temp.find(">", 0) != string::npos) { tmp_locality.setName(TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + 1, temp.end()))); } if (temp.find("Coord", 0) != string::npos) { vector v = getValues_(temp, "="); tmp_locality.setX(TextTools::toDouble(v[0])); tmp_locality.setY(TextTools::toDouble(v[1])); } } if (tmp_locality.getName() != "") data_set.addLocality(tmp_locality); } void PopgenlibIO::parseSequence_(const std::vector& in, VectorSequenceContainer& vsc) { Fasta ifasta; stringstream is; for (size_t i = 0; i < in.size(); i++) { is << in[i] << endl; } ifasta.readSequences(is, vsc); } void PopgenlibIO::parseLoci_(const std::vector& in, std::vector& locus_info) { stringstream is; for (size_t i = 0; i < in.size(); i++) { is << in[i] << endl; } string locinf_name = ""; unsigned int locinf_ploidy = LocusInfo::DIPLOID; string temp; while (!is.eof()) { temp = FileTools::getNextLine(is); if (temp.find(">", 0) != string::npos) { locinf_name = TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + 1, temp.end())); } if (temp.find("Ploidy", 0) != string::npos) { vector v = getValues_(temp, "="); string tmp_str_ploidy = TextTools::removeSurroundingWhiteSpaces(v[0]); tmp_str_ploidy = TextTools::toUpper(tmp_str_ploidy); // cout << "ploidy : " << tmp_str_ploidy << endl; if (tmp_str_ploidy == DIPLOID) locinf_ploidy = LocusInfo::DIPLOID; else if (tmp_str_ploidy == HAPLOID) locinf_ploidy = LocusInfo::HAPLOID; else if (tmp_str_ploidy == HAPLODIPLOID) locinf_ploidy = LocusInfo::HAPLODIPLOID; else if (tmp_str_ploidy == UNKNOWN) locinf_ploidy = LocusInfo::UNKNOWN; } if (temp.find("NbAlleles", 0) != string::npos) { // not used ... } } if (locinf_name != "") locus_info.push_back(LocusInfo(locinf_name, locinf_ploidy)); } void PopgenlibIO::parseIndividual_(const std::vector& in, DataSet& data_set, const VectorSequenceContainer& vsc) { Individual tmp_indiv; size_t tmp_group_pos = 0; string temp = ""; for (size_t i = 0; i < in.size(); i++) { // Get Individual Id if (in[i].find(">", 0) != string::npos) { tmp_indiv.setId(TextTools::removeSurroundingWhiteSpaces(string(in[i].begin() + 1, in[i].end()))); } // Get the Group if (in[i].find("Group", 0) != string::npos) { temp = in[i]; tmp_group_pos = TextTools::toInt(getValues_(temp, "=")[0]); try { data_set.addEmptyGroup(tmp_group_pos); } catch (...) {} } // Find the locality if (in[i].find("Locality", 0) != string::npos) { temp = in[i]; size_t sep_pos = temp.find("=", 0); string loc_name = TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + sep_pos + 1, temp.end())); try { tmp_indiv.setLocality(&data_set.getLocalityByName(loc_name)); } catch (...) {} } // Set the coord if (in[i].find("Coord", 0) != string::npos) { temp = in[i]; tmp_indiv.setCoord(TextTools::toDouble(getValues_(temp, "=")[0]), TextTools::toDouble(getValues_(temp, "=")[1])); } // And the date if (in[i].find("Date", 0) != string::npos) { int d, m, y; temp = in[i]; string tmp_date = getValues_(temp, "=")[0]; d = TextTools::toInt(string(tmp_date.begin(), tmp_date.begin() + 2)); m = TextTools::toInt(string(tmp_date.begin() + 2, tmp_date.begin() + 4)); y = TextTools::toInt(string(tmp_date.begin() + 4, tmp_date.end())); tmp_indiv.setDate(Date(d, m, y)); } // Now the sequences if (in[i].find("SequenceData", 0) != string::npos) { i++; temp = in[i]; vector seq_pos_str = getValues_(temp, ""); for (size_t j = 0; j < seq_pos_str.size(); j++) { try { if (seq_pos_str[j] != getMissingDataSymbol()) tmp_indiv.addSequence(j, vsc.getSequence(TextTools::toInt(seq_pos_str[j]) - 1)); } catch (...) {} } } // Finally the loci if (in[i].find("AllelicData", 0) != string::npos) { string temp1 = in[++i]; string temp2 = in[++i]; vector allele_pos_str1 = getValues_(temp1, ""); vector allele_pos_str2 = getValues_(temp2, ""); try { tmp_indiv.initGenotype(data_set.getNumberOfLoci()); } catch (...) {} if (allele_pos_str1.size() == allele_pos_str2.size()) { for (size_t j = 0; j < allele_pos_str1.size(); j++) { const LocusInfo& locus_info = data_set.getLocusInfoAtPosition(j); allele_pos_str1[j] = TextTools::removeSurroundingWhiteSpaces(allele_pos_str1[j]); vector tmp_alleles_id; if (allele_pos_str1[j] != getMissingDataSymbol()) { BasicAlleleInfo tmp_allele_info(allele_pos_str1[j]); try { data_set.addAlleleInfoByLocusPosition(j, tmp_allele_info); } catch (...) {} tmp_alleles_id.push_back(allele_pos_str1[j]); } allele_pos_str2[j] = TextTools::removeSurroundingWhiteSpaces(allele_pos_str2[j]); if (allele_pos_str2[j] != getMissingDataSymbol()) { BasicAlleleInfo tmp_allele_info(allele_pos_str2[j]); try { data_set.addAlleleInfoByLocusPosition(j, tmp_allele_info); } catch (...) {} tmp_alleles_id.push_back(allele_pos_str2[j]); } try { tmp_indiv.setMonolocusGenotypeByAlleleId(j, tmp_alleles_id, locus_info); } catch (...) {} } } } } if (tmp_indiv.getId() != "") { try { data_set.addIndividualToGroup(data_set.getGroupPosition(tmp_group_pos), tmp_indiv); } catch (...) {} } } void PopgenlibIO::read(const std::string& path, DataSet& data_set) throw (Exception) { AbstractIDataSet::read(path, data_set); } DataSet* PopgenlibIO::read(std::istream& is) throw (Exception) { return AbstractIDataSet::read(is); } DataSet* PopgenlibIO::read(const std::string& path) throw (Exception) { return AbstractIDataSet::read(path); } void PopgenlibIO::write(std::ostream& os, const DataSet& data_set) const throw (Exception) { size_t seqcpt = 1; // General section -------------------------------------- os << "[General]" << endl; os << "MissingData = " << getMissingDataSymbol() << endl; os << "DataSeparator = " << getDataSeparator() << endl; if (data_set.hasSequenceData()) { string seq_type = data_set.getAlphabetType(); os << "SequenceType = " << seq_type << endl; } // Localities section ----------------------------------- if (data_set.hasLocality()) { os << endl << "[Localities]" << endl; for (size_t i = 0; i < data_set.getNumberOfLocalities(); i++) { os << ">" << (data_set.getLocalityAtPosition(i)).getName() << endl; os << "Coord = " << (data_set.getLocalityAtPosition(i)).getX(); os << " " << (data_set.getLocalityAtPosition(i)).getY() << endl; } } // Sequences section ------------------------------------ if (data_set.hasSequenceData()) { Fasta fasta(80); os << endl << "[Sequences]" << endl; for (size_t i = 0; i < data_set.getNumberOfGroups(); i++) { for (size_t j = 0; j < data_set.getNumberOfIndividualsInGroup(i); j++) { fasta.writeSequences(os, data_set.getIndividualAtPositionFromGroup(i, j)->getSequences()); } } } // AllelicData section ---------------------------------- if (data_set.hasAlleleicData()) { os << endl << "[Loci]" << endl; for (size_t i = 0; i < data_set.getNumberOfLoci(); i++) { const LocusInfo& tmp_locus_info = data_set.getLocusInfoAtPosition(i); os << ">" << tmp_locus_info.getName() << endl; os << "Ploidy = "; if (tmp_locus_info.getPloidy() == LocusInfo::HAPLOID) os << HAPLOID; else if (tmp_locus_info.getPloidy() == LocusInfo::DIPLOID) os << DIPLOID; else if (tmp_locus_info.getPloidy() == LocusInfo::HAPLODIPLOID) os << HAPLODIPLOID; else if (tmp_locus_info.getPloidy() == LocusInfo::UNKNOWN) os << UNKNOWN; os << endl; os << "NbAlleles = " << tmp_locus_info.getNumberOfAlleles() << endl; } } // Individuals section ---------------------------------- os << endl << "[Individuals]" << endl; for (size_t i = 0; i < data_set.getNumberOfGroups(); i++) { for (size_t j = 0; j < data_set.getNumberOfIndividualsInGroup(i); j++) { if (i > 0 || j > 0) os << endl; const Individual* tmp_ind = data_set.getIndividualAtPositionFromGroup(i, j); os << ">" << tmp_ind->getId() << endl; os << "Group = " << TextTools::toString((data_set.getGroupAtPosition(i)).getGroupId()) << endl; if (tmp_ind->hasLocality()) os << "Locality = " << tmp_ind->getLocality()->getName() << endl; if (tmp_ind->hasCoord()) os << "Coord = " << tmp_ind->getX() << " " << tmp_ind->getY() << endl; if (tmp_ind->hasDate()) os << "Date = " << tmp_ind->getDate().getDateStr() << endl; if (tmp_ind->hasSequences()) { size_t nbss = tmp_ind->getNumberOfSequences(); os << "SequenceData = {" << endl; for (size_t k = 0; k < nbss; k++) { try { tmp_ind->getSequenceAtPosition(k); os << TextTools::toString(seqcpt++); } catch (SequenceNotFoundException) { os << getMissingDataChar(); } if (k < nbss - 1) os << getDataSeparatorChar(); else os << endl; } os << "}" << endl; } if (tmp_ind->hasGenotype()) { const MultilocusGenotype& tmp_genotype = tmp_ind->getGenotype(); vector > output(tmp_genotype.size()); os << "AllelicData = {" << endl; for (size_t k = 0; k < tmp_genotype.size(); k++) { output[k].resize(2); if (tmp_genotype.isMonolocusGenotypeMissing(k)) { output[k][0] = getMissingDataChar(); output[k][1] = getMissingDataChar(); } else { vector tmp_all_ind = tmp_genotype.getMonolocusGenotype(k).getAlleleIndex(); output[k][0] = data_set.getLocusInfoAtPosition(k).getAlleleInfoByKey(tmp_all_ind[0]).getId(); if (tmp_all_ind.size() > 1) output[k][1] = data_set.getLocusInfoAtPosition(k).getAlleleInfoByKey(tmp_all_ind[1]).getId(); else output[k][1] = getMissingDataChar(); } } for (size_t k = 0; k < output.size(); k++) { os << output[k][0]; if (k < output.size() - 1) os << getDataSeparatorChar(); else os << endl; } for (size_t k = 0; k < output.size(); k++) { os << output[k][1]; if (k < output.size() - 1) os << getDataSeparatorChar(); else os << endl; } os << "}" << endl; } } } } void PopgenlibIO::write(const std::string& path, const DataSet& data_set, bool overwrite) const throw (Exception) { AbstractODataSet::write(path, data_set, overwrite); } std::vector PopgenlibIO::getValues_(std::string& param_line, const std::string& delim) { vector values; size_t limit = param_line.find(delim, 0); if (limit >= 0) param_line = string(param_line.begin() + limit + delim.size(), param_line.end()); param_line = TextTools::removeSurroundingWhiteSpaces(param_line); size_t bi = 0; size_t bs = param_line.find(getDataSeparatorChar(), bi); while (bs > 0) { values.push_back(string(param_line.begin() + bi, param_line.begin() + bs)); bi = bs + 1; bs = param_line.find(getDataSeparatorChar(), bi); } values.push_back(string(param_line.begin() + bi, param_line.end())); return values; } bpp-popgen-2.1.0/src/Bpp/PopGen/AnalyzedLoci.cpp000644 000000 000000 00000016472 12147656633 021350 0ustar00rootroot000000 000000 // // File AnalyzedLoci.cpp // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "AnalyzedLoci.h" using namespace bpp; using namespace std; /******************************************************************************/ AnalyzedLoci::AnalyzedLoci(size_t number_of_loci) : loci_(vector(number_of_loci)) { for (size_t i = 0; i < loci_.size(); i++) { loci_[i] = 0; } } /******************************************************************************/ AnalyzedLoci::AnalyzedLoci(const AnalyzedLoci& analyzed_loci) : loci_(vector(analyzed_loci.loci_.size())) { for (size_t i = 0; i < analyzed_loci.getNumberOfLoci(); i++) { loci_[i] = new LocusInfo(analyzed_loci.getLocusInfoAtPosition(i)); } } /******************************************************************************/ AnalyzedLoci::~AnalyzedLoci() { for (size_t i = 0; i < loci_.size(); i++) { delete loci_[i]; } } /******************************************************************************/ void AnalyzedLoci::setLocusInfo( size_t locus_position, const LocusInfo& locus) throw (IndexOutOfBoundsException) { if (locus_position >= 0 && locus_position < loci_.size()) loci_[locus_position] = new LocusInfo(locus); else throw IndexOutOfBoundsException("AnalyzedLoci::setLocusInfo: locus_position out of bounds", locus_position, 0, loci_.size()); } /******************************************************************************/ size_t AnalyzedLoci::getLocusInfoPosition( const std::string& locus_name) const throw (BadIdentifierException) { for (size_t i = 0; i < loci_.size(); i++) { if (loci_[i] != NULL && loci_[i]->getName() == locus_name) return i; } throw BadIdentifierException("AnalyzedLoci::getLocusInfoPosition: locus not found.", locus_name); } /******************************************************************************/ const LocusInfo& AnalyzedLoci::getLocusInfoByName( const std::string& locus_name) const throw (BadIdentifierException) { for (size_t i = 0; i < loci_.size(); i++) { if (loci_[i] != NULL && loci_[i]->getName() == locus_name) return *(loci_[i]); } throw BadIdentifierException("AnalyzedLoci::getLocusInfo: locus not found.", locus_name); } /******************************************************************************/ const LocusInfo& AnalyzedLoci::getLocusInfoAtPosition( size_t locus_position) const throw (Exception) { if (locus_position >= loci_.size()) throw IndexOutOfBoundsException("AnalyzedLoci::getLocusInfoAtPosition: locus_position out of bounds.", locus_position, 0, loci_.size()); if (loci_[locus_position] != NULL) return *(loci_[locus_position]); else throw NullPointerException("AnalyzedLoci::getLocusInfo: no locus defined here."); } /******************************************************************************/ // AlleleInfo void AnalyzedLoci::addAlleleInfoByLocusName(const std::string& locus_name, const AlleleInfo& allele) throw (Exception) { bool locus_found = false; for (vector::iterator it = loci_.begin(); it != loci_.end(); it++) { if ((*it)->getName() == locus_name) { locus_found = true; try { (*it)->addAlleleInfo(allele); } catch (BadIdentifierException& bie) { throw BadIdentifierException("AnalyzedLoci::addAlleleInfoByLocusName: allele id already in use.", bie.getIdentifier()); } } } if (!locus_found) throw LocusNotFoundException("AnalyzedLoci::addAlleleInfoByLocusName: locus_name not found.", locus_name); } /******************************************************************************/ void AnalyzedLoci::addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo& allele) throw (Exception) { if (locus_position >= 0 && locus_position < loci_.size()) { try { loci_[locus_position]->addAlleleInfo(allele); } catch (BadIdentifierException& bie) { throw BadIdentifierException("AnalyzedLoci::addAlleleInfoByLocusPosition: allele id is already in use.", bie.getIdentifier()); } } else throw IndexOutOfBoundsException("AnalyzedLoci::addAlleleInfoByLocusPosition: locus_position out of bounds.", locus_position, 0, loci_.size()); } /******************************************************************************/ std::vector AnalyzedLoci::getNumberOfAlleles() const { vector allele_count; for (size_t i = 0; i < loci_.size(); i++) { allele_count.push_back(loci_[i]->getNumberOfAlleles()); } return allele_count; } /******************************************************************************/ unsigned int AnalyzedLoci::getPloidyByLocusName(const std::string& locus_name) const throw (LocusNotFoundException) { for (size_t i = 0; i < loci_.size(); i++) { if (loci_[i] != NULL && loci_[i]->getName() == locus_name) return loci_[i]->getPloidy(); } throw LocusNotFoundException("AnalyzedLoci::getLocusInfo: locus_name not found.", locus_name); } /******************************************************************************/ unsigned int AnalyzedLoci::getPloidyByLocusPosition(size_t locus_position) const throw (IndexOutOfBoundsException) { if (locus_position >= loci_.size()) throw IndexOutOfBoundsException("AnalyzedLoci::getPloidyByLocusPosition: locus_position out of bounds.", locus_position, 0, loci_.size()); return loci_[locus_position]->getPloidy(); } /******************************************************************************/ bpp-popgen-2.1.0/src/Bpp/PopGen/BasicAlleleInfo.h000644 000000 000000 00000006214 12147656633 021404 0ustar00rootroot000000 000000 // // File BasicAlleleInfo.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _BASICALLELEINFO_H_ #define _BASICALLELEINFO_H_ // From local Pop #include "AlleleInfo.h" #include "GeneralExceptions.h" namespace bpp { /** * @brief The BasicAlleleInfo class. * * This is the simplest allele class implementation which contains just an identitier. * * @author Sylvain Gaillard */ class BasicAlleleInfo : public AlleleInfo { private: std::string id_; public: // Constructors and destructor /** * @brief Build a new allele. * * @param id The identity number of the allele. */ BasicAlleleInfo(const std::string& id); /** * @brief The BasicAlleleInfo copy constructor. */ BasicAlleleInfo(const BasicAlleleInfo& allele); virtual ~BasicAlleleInfo(); public: // Methodes /** * @brief The assignation operator. */ virtual BasicAlleleInfo& operator=(const BasicAlleleInfo& allele); /** * @brief The == operator. */ virtual bool operator==(const BasicAlleleInfo& allele) const; /** * @brief The != operator. */ virtual bool operator!=(const BasicAlleleInfo& allele) const; /** * @name The Clonable interface * @{ */ #ifdef NO_VIRTUAL_COV Clonable* #else BasicAlleleInfo* #endif clone() const { return new BasicAlleleInfo(*this); } /** @} */ /** * @name The AlleleInfo interface */ void setId(const std::string& allele_id); const std::string& getId() const; /** @} */ }; } // end of namespace bpp; #endif // _BASICALLELEINFO_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/GeneralExceptions.h000644 000000 000000 00000017644 12147656633 022060 0ustar00rootroot000000 000000 // // File GeneralExceptions.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _GENERALEXCEPTIONS_H_ #define _GENERALEXCEPTIONS_H_ // From STL #include #include namespace bpp { // **************************************************************************** // /** * @brief The BadIdentifierException class. * * This exception is used when an identifier is not found. * The identifier can be either a string or an integer but its * value is stored as a string. * * @author Sylvain Gaillard */ class BadIdentifierException : public Exception { public: // Class constructor /** * @brief Build the exception with a numerical identifier. */ BadIdentifierException(const char* text, size_t id); /** * @brief Build the exception with a numerical identifier. */ BadIdentifierException(const std::string& text, size_t id); /** * @brief Build the exception with a textual identifier. */ BadIdentifierException(const char* text, const std::string& id); /** * @brief Build the exception with a textual identifier. */ BadIdentifierException(const std::string& text, const std::string& id); // Class destructor ~BadIdentifierException() throw (); public: /** * @brief Return the value of the identifier as a string. */ virtual const std::string getIdentifier() const; protected: const std::string id_; }; // ***************************************************************************** /** * @brief The LocusNotFoundException class. */ class LocusNotFoundException : public BadIdentifierException { public: // Class constructor /** * @brief Build the exception with a numerical identifier. */ LocusNotFoundException(const char* text, size_t id); /** * @brief Build the exception with a numerical identifier. */ LocusNotFoundException(const std::string& text, size_t id); /** * @brief Build the exception with a textual identifier. */ LocusNotFoundException(const char* text, const std::string& id); /** * @brief Build the exception with a textual identifier. */ LocusNotFoundException(const std::string& text, const std::string& id); // Class destructor ~LocusNotFoundException() throw (); public: /** * @brief Return the value of the identifier as a string. */ virtual const std::string getIdentifier() const; }; // ***************************************************************************** /** * @brief The AlleleNotFoundException class. */ class AlleleNotFoundException : public BadIdentifierException { public: // Class constructor /** * @brief Build the exception with a numerical identifier. */ AlleleNotFoundException(const char* text, size_t id); /** * @brief Build the exception with a numerical identifier. */ AlleleNotFoundException(const std::string& text, size_t id); /** * @brief Build the exception with a textual identifier. */ AlleleNotFoundException(const char* text, const std::string& id); /** * @brief Build the exception with a textual identifier. */ AlleleNotFoundException(const std::string& text, const std::string& id); // Class destructor ~AlleleNotFoundException() throw (); public: /** * @brief Return the value of the identifier as a string. */ virtual const std::string getIdentifier() const; }; // ***************************************************************************** /** * @brief The LocalityNotFoundException class. */ class LocalityNotFoundException : public BadIdentifierException { public: // Class constructor /** * @brief Build the exception with a numerical identifier. */ LocalityNotFoundException(const char* text, size_t id); /** * @brief Build the exception with a numerical identifier. */ LocalityNotFoundException(const std::string& text, size_t id); /** * @brief Build the exception with a textual identifier. */ LocalityNotFoundException(const char* text, const std::string& id); /** * @brief Build the exception with a textual identifier. */ LocalityNotFoundException(const std::string& text, const std::string& id); // Class destructor ~LocalityNotFoundException() throw (); public: /** * @brief Return the value of the identifier as a string. */ virtual const std::string getIdentifier() const; }; // ***************************************************************************** /** * @brief The IndividualNotFoundException class. */ class IndividualNotFoundException : public BadIdentifierException { public: // Class constructor /** * @brief Build the exception with a numerical identifier. */ IndividualNotFoundException(const char* text, size_t id); /** * @brief Build the exception with a numerical identifier. */ IndividualNotFoundException(const std::string& text, size_t id); /** * @brief Build the exception with a textual identifier. */ IndividualNotFoundException(const char* text, const std::string& id); /** * @brief Build the exception with a textual identifier. */ IndividualNotFoundException(const std::string& text, const std::string& id); // Class destructor ~IndividualNotFoundException() throw (); public: /** * @brief Return the value of the identifier as a string. */ virtual const std::string getIdentifier() const; }; // ***************************************************************************** /** * @brief The GroupNotFoundException class. */ class GroupNotFoundException : public BadIdentifierException { public: // Class constructor /** * @brief Build the exception with a numerical identifier. */ GroupNotFoundException(const char* text, size_t id); /** * @brief Build the exception with a numerical identifier. */ GroupNotFoundException(const std::string& text, size_t id); /** * @brief Build the exception with a textual identifier. */ GroupNotFoundException(const char* text, const std::string& id); /** * @brief Build the exception with a textual identifier. */ GroupNotFoundException(const std::string& text, const std::string& id); // Class destructor ~GroupNotFoundException() throw (); public: /** * @brief Return the value of the identifier as a string. */ virtual const std::string getIdentifier() const; }; } // end of namespace bpp; #endif // _GENERALEXCEPTIONS_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/Individual.h000644 000000 000000 00000034046 12147656633 020524 0ustar00rootroot000000 000000 // // File Individual.h // Author : Sylvain Gaillard // Last modification : Tuesday August 03 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _INDIVIDUAL_H_ #define _INDIVIDUAL_H_ // From STL #include #include #include #include #include // From SeqLib #include #include #include #include // From PopGenLib #include "Locality.h" #include "Date.h" #include "MultilocusGenotype.h" #include "GeneralExceptions.h" namespace bpp { /** * @brief The Individual class. * * This class is designed to store data on a single individual. * This individual has only one sequence for each locus ... no information * about diploid sequence data. * See the no more in use MultiSeqIndividual documentation for an alternative. * * @author Sylvain Gaillard */ class Individual { protected: std::string id_; unsigned short sex_; std::auto_ptr date_; std::auto_ptr< Point2D > coord_; const Locality* locality_; std::auto_ptr sequences_; std::auto_ptr genotype_; public: // Constructors and destructor : /** * @brief Build a void new Individual. */ Individual(); /** * @brief Build a new Individual with an identifier. */ Individual(const std::string& id); /** * @brief Build a new Individual with parameters. * * @param id The id of the Individual as a string. * @param date The date of the Individual as a Date object. * @param coord The coordinates of the Individual as a Point2D object. * @param locality The locality of the Individual as a pointer to a Locality * object. * @param sex The sex of the Individual as an unsigned short. */ Individual(const std::string& id, const Date& date, const Point2D& coord, Locality* locality, const unsigned short sex); /** * @brief The Individual copy constructor. */ Individual(const Individual& ind); /** * @brief Destroy an Individual. */ virtual ~Individual(); public: // Methods /** * @brief The Individual copy operator. * * @return A ref toward the assigned Individual. * Make a copy of each atribute of the Individual. */ Individual& operator=(const Individual& ind); /** * @brief Set the id of the Individual. * * @param id The id of the Individual as a string. */ void setId(const std::string& id); /** * @brief Get the id of the Individual. * * @return The id of the Individual as a string. */ const std::string& getId() const { return id_; } /** * @brief Set the sex of the Individual. * * @param sex An unsigned short coding for the sex. */ void setSex(const unsigned short sex); /** * @brief Get the sex of the Individual. * * @return The sex of the Individual as an unsigned short. */ unsigned short getSex() const { return sex_; } /** * @brief Set the date of the Individual. * * @param date The date as a Date object. */ void setDate(const Date& date); /** * @brief Get the date of the Individual. * * @return A pointer toward a Date object if the Individual has a date. * Otherwise throw a NullPointerException. */ const Date& getDate() const throw (NullPointerException); /** * @brief Tell if this Individual has a date. */ bool hasDate() const; /** * @brief Set the coodinates of the Individual. * * @param coord A Point2D object. */ void setCoord(const Point2D& coord); /** * @brief Set the coordinates of the Individual. * * @param x The X coordinate as a double. * @param y The Y coordinate as a double. */ void setCoord(const double x, const double y); /** * @brief Get the coordinates of the Induvidual. * * @return A pointer toward a Point2D object if the Individual has * coordinates. Otherwise throw a NullPointerException. */ const Point2D& getCoord() const throw (NullPointerException); /** * @brief Tell if this Individual has coordinates. */ bool hasCoord() const; /** * @brief Set the X coordinate of the Individual. * * @param x The X coordinate as a double. * * Set the X coordinate if the Individual has coordinates. * Otherwise throw a NullPointerException. */ void setX(const double x) throw (NullPointerException); /** * @brief Set the Y coordinate of th Individual. * * @param y The Y coordinate as a double. * * Set the Y coordinate if the Individual has coordinates. * Otherwise throw a NullPointerException. */ void setY(const double y) throw (NullPointerException); /** * @brief Get the X coordinate of the Individual. * * @return The X coordinate as a double if the Individual has coordinates. * Otherwise throw a NullPointerException. */ double getX() const throw (NullPointerException); /** * @brief Get the Y coordinate of the Individual. * * @return The Y coordinate as a double if the Individual has coordinates. * Otherwise throw a NullPointerException. */ double getY() const throw (NullPointerException); /** * @brief Set the locality of the Individual. * * @param locality A pointer to a Locality object. */ void setLocality(const Locality* locality); /** * @brief Get the locality of the Individual. * * @return A pointer to the Locality of the Individual. */ const Locality* getLocality() const throw (NullPointerException); /** * @brief Tell if this Individual has a locality. */ bool hasLocality() const; /** * @brief Add a sequence to the Individual. * * Creates the sequence container when adding the first sequence. * Otherwize add the sequence to the end of the sequence container. * * @param sequence_key the place where the sequence will be put. * @param sequence The sequence to add. * @throw AlphabetMismatchException if the sequence's alphabet doesn't match the container's alphabet. * @throw BadIdentifierException if sequence's name is already in use. * @throw BadIntegerException if sequence_position is already in use. */ void addSequence(size_t sequence_key, const Sequence& sequence) throw (Exception); /** * @brief Get a sequence by its name. * * @param sequence_name The name of the sequence. * @return A reference to the sequence. * @throw NullPointerException if there is no sequence container defined. * @throw SequenceNotFoundException if sequence_name is not found. */ const Sequence& getSequenceByName(const std::string& sequence_name) const throw (Exception); /** * @brief Get a sequence by its position. * * @param sequence_position The position of the sequence in the sequence set. * @return A reference to the sequence. * @throw NullPointerException if there is no sequence container defined. * @throw SequenceNotFoundException if sequence_position is not found (i.e. missing data or not used). */ const Sequence& getSequenceAtPosition(const size_t sequence_position) const throw (Exception); /** * @brief Delete a sequence. * * @param sequence_name The name of the sequence. * @throw NullPointerException if there is no sequence container defined. * @throw SequenceNotFoundException if sequence_name is not found. */ void deleteSequenceByName(const std::string& sequence_name) throw (Exception); /** * @brief Delete a sequence. * * @param sequence_position The position of the sequence. * @throw NullPointerException if there is no sequence container defined. * @throw SequenceNotFoundException if sequence_postion is not found. */ void deleteSequenceAtPosition(size_t sequence_position) throw (Exception); /** * @brief Tell if the Individual has some sequences. * * @return TRUE if the individual has at least one sequence. * @return FALSE if the container is empty or undifined. */ bool hasSequences() const; /** * @brief Tell if the Individual has a sequence at a given position. */ bool hasSequenceAtPosition(size_t position) const; /** * @brief Return the alphabet of the sequences. * * @throw NullPointerException if there is no sequence container defined. */ const Alphabet* getSequenceAlphabet() const throw (NullPointerException); /** * @brief Get the sequences' names. * * @return All the sequences' names of the individual in a vector of string. * @throw NullPointerException if there is no sequence container defined. */ std::vector getSequencesNames() const throw (NullPointerException); /** * @brief Get the sequences' positions. * * @return All the positions where a sequence is found. * @throw NullPointerException if there is no sequence container defined. */ std::vector getSequencesPositions() const throw (NullPointerException); /** * @brief Get the position of a sequence. * * @throw NullPointerException if there is no sequence container defined. * @throw SequenceNotFoundException if sequence_name is not found. */ size_t getSequencePosition(const std::string& sequence_name) const throw (Exception); /** * @brief Get the number of sequences. */ size_t getNumberOfSequences() const; /** * @brief Set all the sequences with a MapSequenceContainer. */ void setSequences(const MapSequenceContainer& msc); /** * @brief Get a reference to the sequence container. * * @throw NullPointerException if there is no sequence container defined. */ const OrderedSequenceContainer& getSequences() const throw (NullPointerException); /** * @brief Set a genotype. * * @param genotype The MultilocusGenotype which will be copied. */ void setGenotype(const MultilocusGenotype& genotype); /** * @brief Init the genotype. * * @throw Exception if the Individual already has a Genotype. * @throw BadIntegerException if loci_number < 1. */ void initGenotype(size_t loci_number) throw (Exception); /** * @brief Get the genotype. */ const MultilocusGenotype& getGenotype() const throw (NullPointerException); /** * @brief Delete the genotype of the individual. */ void deleteGenotype(); /** * @brief Tell if the Individual has a MultilocusGenotype. */ bool hasGenotype() const; /** * @brief Set a MonolocusGenotype. * * @throw NullPointerException if there is no genotype defined. * @throw IndexOutOfBoundsException if locus_position excedes the number of loci. */ void setMonolocusGenotype(size_t locus_position, const MonolocusGenotype& monogen) throw (Exception); /** * @brief Set a MonolocusGenotype. * * @throw NullPointerException if there is no genotype defined. * @throw IndexOutOfBoundsException if locus_position excedes the number of loci. * @throw Exception if there is no key in allele_keys. */ void setMonolocusGenotypeByAlleleKey(size_t locus_position, const std::vector allele_keys) throw (Exception); /** * @brief Set a MonolocusGenotype. * * @throw NullPointerException if there is no genotype defined. * @throw IndexOutOfBoundsException if locus_position excedes the number of loci. * @throw AlleleNotFoundException if at least one the id is not found in the LocusInfo. */ void setMonolocusGenotypeByAlleleId(size_t locus_position, const std::vector allele_id, const LocusInfo& locus_info) throw (Exception); /** * @brief Get a MonolocusGenotype. * * @throw NullPointerException if there is no genotype defined. * @throw IndexOutOfBoundsException if locus_position excedes the number of loci. */ const MonolocusGenotype& getMonolocusGenotype(size_t locus_position) throw (Exception); /** * @brief Count the number of non missing MonolocusGenotype. * * @throw NullPointerException if there is no genotype defined. */ size_t countNonMissingLoci() const throw (NullPointerException); /** * @brief Count the number of homozygous MonolocusGenotype. * * @throw NullPointerException if there is no genotype defined. */ size_t countHomozygousLoci() const throw (NullPointerException); /** * @brief Count the number of heterozygous MonolocusGenotype. * * @throw NullPointerException if there is no genotype defined. */ size_t countHeterozygousLoci() const throw (NullPointerException); }; } // end of namespace bpp; #endif // _INDIVIDUAL_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/PolymorphismMultiGContainerTools.h000644 000000 000000 00000011177 12147656633 025144 0ustar00rootroot000000 000000 // // File PolymorphismMultiGContainerTools.h // Authors : Sylvain Gailard // Khalid Belkhir // Last modification : june 15 2006 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _POLYMORPHISMMULTIGCONTAINERTOOLS_H_ #define _POLYMORPHISMMULTIGCONTAINERTOOLS_H_ // From the STL #include // From the PolGenLib library #include "PolymorphismMultiGContainer.h" #include namespace bpp { /** * @brief Tools for PolymorphismMultiGContainer. * * Provides static methods for permutations. * * @author Sylvain Gaillard */ class PolymorphismMultiGContainerTools { public: /** * @brief Permut the MultilocusGenotype in the whole PolymorphismMultiGContainer. * * @param pmgc The PolymorphismMultiGContainer to permut. * @return A permuted PolymorphismMultiGContainer. */ static PolymorphismMultiGContainer permutMultiG(const PolymorphismMultiGContainer& pmgc); /** * @brief Permut the MonolocusGenotype. * * Permut the MonolocusGenotypes in one or several groups breaking * the links between them. * * @param pmgc The PolymorphismMultiGContainer to permut. * @param groups The groups ids between which the MonolocusGenotypes will be permuted. * @return A permuted PolymorphismMultiGContainer. */ static PolymorphismMultiGContainer permutMonoG(const PolymorphismMultiGContainer& pmgc, const std::set& groups); /** * @brief Permut the MonolocusGenotype between individuals in the same group. * * Permut the MonolocusGenotypes for a set of groups. The idiv for the other groups * are kept intact * * @param pmgc The PolymorphismMultiGContainer to permut. * @param groups The groups ids for which the MonolocusGenotypes will be permuted. * @return A permuted PolymorphismMultiGContainer. */ static PolymorphismMultiGContainer permutIntraGroupMonoG(const PolymorphismMultiGContainer& pmgc, const std::set& groups); /** * @brief Permut the Alleles. * * Permut the alleles in one or several groups breaking * the links between them. * * @param pmgc The PolymorphismMultiGContainer to permut. * @param groups The groups ids between which the MonolocusGenotypes will be permuted. * @return A permuted PolymorphismMultiGContainer. */ static PolymorphismMultiGContainer permutAlleles(const PolymorphismMultiGContainer& pmgc, const std::set& groups); /** * @brief Permut the Alleles between individuals in the same group. * * Permut the alleles in one or several groups * * @param pmgc The PolymorphismMultiGContainer to permut. * @param groups The groups ids between which the MonolocusGenotypes will be permuted. * @return A permuted PolymorphismMultiGContainer. */ static PolymorphismMultiGContainer permutIntraGroupAlleles(const PolymorphismMultiGContainer& pmgc, const std::set& groups); static PolymorphismMultiGContainer extractGroups(const PolymorphismMultiGContainer& pmgc, const std::set& groups); }; } // end of namespace bpp; #endif // _POLYMORPHISMMULTIGCONTAINERTOOLS_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/PolymorphismSequenceContainer.h000644 000000 000000 00000027150 12147656633 024470 0ustar00rootroot000000 000000 // // File: PolymorphismSequenceContainer.h // Authors: Eric Bazin // Sylvain Gaillard // Created on: Wednesday August 04 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _POLYMORPHISMSEQUENCECONTAINER_H_ #define _POLYMORPHISMSEQUENCECONTAINER_H_ #include #include #include #include #include #include #include /** * @mainpage * * @par * The PopGenLib library provides classes for population genetics analysis. * It makes intensive use of the SeqLib library, and adds a dedicated container * named bpp::PolymorphismSequenceContainer, which associates frequencies to the * sequences in the set. The bpp::PolymorphismSequenceContainerTools and * bpp::SequenceStatistics static classes provide several tools for data analysis, * including diversity indices and positive selection tests. * * @section dataset Population and sample data storage and manipulation * * @par * PopGenLib library provides data structure for handling sample and data sets * for population genetics. * These objects are embedded in the bpp::DataSet object which is a container of bpp::Group * of bpp::Individual. * Each bpp::Individual can store bpp::Sequence data or allelic data with the dedicated * classes bpp::MultilocusGenotype. * * @section genetics Population genetics data and statistics * * @par * To compute statistics on data, two containers families are provided, one for sequences * (bpp::PolymorphismSequenceContainer) and the other for allelic data (bpp::PolymorphismMultiGContainer). * Static tools class for both families are provided to compute several common or less * common statistics. * * @section statistics Statistics overview * * @par heterozygosity * @par watterson75 Diversity estimator Theta of Watterson * @par tajima83 Diversity estimator Theta of Tajima * @par DVH Haplotype diversity of Depaulis and Veuille * @par D Tajima's D test */ namespace bpp { /** * @brief The PolymorphismSequenceContainer class. * * This is a VectorSiteContainer with effectif for each sequence. * It also has flag for ingroup and outgroup. * * @author Sylvain Gaillard */ class PolymorphismSequenceContainer : public VectorSiteContainer { private: std::vector ingroup_; std::vector count_; std::vector group_; public: // Constructors and destructor /** * @brief Build a new empty PolymorphismSequenceContainer. */ PolymorphismSequenceContainer(const Alphabet* alpha); /** * @brief Build a new empty PolymorphismSequenceContainer of given size. */ PolymorphismSequenceContainer(size_t size, const Alphabet* alpha); /** * @brief Build a PolymorphismSequenceContainer by copying data from an OrderedSequenceContainer. */ PolymorphismSequenceContainer(const OrderedSequenceContainer& sc); /** * @brief Build a PolymorphismSequenceContainer by copying data from a SiteContainer. */ PolymorphismSequenceContainer(const SiteContainer& sc); /** * @brief Copy constructor. */ PolymorphismSequenceContainer(const PolymorphismSequenceContainer& psc); /** * @brief Operator= : copy operator. */ PolymorphismSequenceContainer& operator=(const PolymorphismSequenceContainer& psc); /** * @brief Destroy a PolymorphismSequenceContainer. */ virtual ~PolymorphismSequenceContainer(); /** * @brief Clone a PolymorphismSequenceContainer. */ PolymorphismSequenceContainer* clone() const { return new PolymorphismSequenceContainer(*this); } public: // Other methods /** * @brief Remove a sequence by index and return a pointer to this removed sequence. * * @throw IndexOutOfBoundsException if index excedes the number of sequences. */ Sequence* removeSequence(size_t index) throw (IndexOutOfBoundsException); /** * @brief Remove a sequence by name and return a pointer to this removed sequence. * * @throw SequenceNotFoundException if name is not found among the sequences' names. */ Sequence* removeSequence(const std::string& name) throw (SequenceNotFoundException); /** * @brief Delete a sequence by index. * * @throw IndexOutOfBoundsException if index excedes the number of sequences. */ void deleteSequence(size_t index) throw (IndexOutOfBoundsException); /** * @brief Delete a sequence by name. * * @throw SequenceNotFoundException if name is not found among the sequences' names. */ void deleteSequence(const std::string& name) throw (SequenceNotFoundException); /** * @brief Add a sequence to the container. * * @throw AlphabetMismatchException if the sequence's alphabet doesn't match the container's alphabet. * @throw SequenceException if the sequence's size doesn't match the sequence's size of the container. * @throw SequenceException if the sequence's name already exists in the container. */ void addSequence(const Sequence& sequence, size_t effectif = 1, bool checkNames = true) throw (Exception); /** * @brief Clear the container of all its sequences. */ void clear(); /** * @brief Get the group identifier of the sequence. * * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container. */ size_t getGroupId(size_t index) const throw (IndexOutOfBoundsException); /** * @brief Get the group identifier of a sequence. * * @throw SequenceNotFoundException if name is not found among the sequences' names. */ size_t getGroupId(const std::string& name) const throw (SequenceNotFoundException); /** * @brief Get all the groups identifiers. */ std::set getAllGroupsIds() const; /** * @brief Set the group identifier of a sequence. * * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container. */ void setGroupId(size_t index, size_t group_id) throw (IndexOutOfBoundsException); /** * @brief Set the group identifier of a sequence. * * @throw SequenceNotFoundException if name is not found among the sequences' names. */ void setGroupId(const std::string& name, size_t group_id) throw (SequenceNotFoundException); /** * @brief Get the number of groups. */ size_t getNumberOfGroups() const; /** * @brief Tell if the sequence is ingroup by index. * * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container. */ bool isIngroupMember(size_t index) const throw (IndexOutOfBoundsException); /** * @brief Tell if a sequence is ingroup by name. * * @throw SequenceNotFoundException if name is not found among the sequences' names. */ bool isIngroupMember(const std::string& name) const throw (SequenceNotFoundException); /** * @brief Set a sequence as ingroup member by index. * * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container. */ void setAsIngroupMember(size_t index) throw (IndexOutOfBoundsException); /** * @brief Set a sequence as ingroup member by name. * * @throw SequenceNotFoundException if name is not found among the sequences' names. */ void setAsIngroupMember(const std::string& name) throw (SequenceNotFoundException); /** * @brief Set a sequence as outgroup member by index. * * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container. */ void setAsOutgroupMember(size_t index) throw (IndexOutOfBoundsException); /** * @brief Set a sequence as outgroup member by name. * * @throw SequenceNotFoundException if name is not found among the sequences' names. */ void setAsOutgroupMember(const std::string& name) throw (SequenceNotFoundException); /** * @brief Set the count of a sequence by index. * * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container. * @throw BadIntegerException if count < 1 ... use deleteSequence instead of setting the count to 0. */ void setSequenceCount(size_t index, size_t count) throw (Exception); /** * @brief Set the count of a sequence by name. * * @throw throw SequenceNotFoundException if name is not found among the sequences' names. * @throw BadIntegerException if count < 1 ... use deleteSequence instead of setting the count to 0. */ void setSequenceCount(const std::string& name, size_t count) throw (Exception); /** * @brief Add 1 to the sequence count. * * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container. */ void incrementSequenceCount(size_t index) throw (IndexOutOfBoundsException); /** * @brief Add 1 to the sequence count. * * @throw throw SequenceNotFoundException if name is not found among the sequences' names. */ void incrementSequenceCount(const std::string& name) throw (SequenceNotFoundException); /** * @brief Remove 1 to the sequence count. * * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container. * @throw BadIntegerException if count < 1 ... use deleteSequence instead of setting the count to 0. */ void decrementSequenceCount(size_t index) throw (Exception); /** * @brief Remove 1 to the sequence count. * * @throw throw SequenceNotFoundException if name is not found among the sequences' names. * @throw BadIntegerException if count < 1 ... use deleteSequence instead of setting the count to 0. */ void decrementSequenceCount(const std::string& name) throw (Exception); /** * @brief Get the count of a sequence by index. * * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container. */ size_t getSequenceCount(size_t index) const throw (IndexOutOfBoundsException); /** * @brief Get the count of a sequence by name. * * @throw SequenceNotFoundException if name is not found among the sequences' names. */ size_t getSequenceCount(const std::string& name) const throw (SequenceNotFoundException); }; } // end of namespace bpp; #endif // _POLYMORPHISMSEQUENCECONTAINER_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/AbstractIDataSet.cpp000644 000000 000000 00000004532 12147656633 022106 0ustar00rootroot000000 000000 // // File AbstractIDataSet.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "AbstractIDataSet.h" using namespace bpp; // From STL #include using namespace std; AbstractIDataSet::~AbstractIDataSet() {} void AbstractIDataSet::read(const std::string& path, DataSet& data_set) throw (Exception) { ifstream input(path.c_str(), ios::in); read(input, data_set); input.close(); } DataSet* AbstractIDataSet::read(std::istream& is) throw (Exception) { DataSet* data_set = new DataSet(); read(is, *data_set); return data_set; } DataSet* AbstractIDataSet::read(const std::string& path) throw (Exception) { DataSet* data_set = new DataSet(); read(path, *data_set); return data_set; } bpp-popgen-2.1.0/src/Bpp/PopGen/PolymorphismMultiGContainer.cpp000644 000000 000000 00000025333 12147656633 024455 0ustar00rootroot000000 000000 // // File PolymorphismMultiGContainer.cpp // Author : Sylvain Gaillard // Khalid Belkhir // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "PolymorphismMultiGContainer.h" using namespace bpp; using namespace std; // ** Constructors : **********************************************************/ PolymorphismMultiGContainer::PolymorphismMultiGContainer() : multilocusGenotypes_(std::vector()), groups_(std::vector()), groups_names_(std::map()) {} PolymorphismMultiGContainer::PolymorphismMultiGContainer(const PolymorphismMultiGContainer& pmgc) : multilocusGenotypes_(std::vector(pmgc.size())), groups_(std::vector(pmgc.size())), groups_names_(std::map()) { for (size_t i = 0; i < pmgc.size(); i++) { multilocusGenotypes_[i] = new MultilocusGenotype(*pmgc.getMultilocusGenotype(i)); groups_[i] = pmgc.getGroupId(i); } set grp_ids = pmgc.getAllGroupsIds(); for (set::iterator it = grp_ids.begin(); it != grp_ids.end(); it++) { size_t id = *it; string name = pmgc.getGroupName(id); groups_names_[id] = name; } } // ** Destructor : ************************************************************/ PolymorphismMultiGContainer::~PolymorphismMultiGContainer() { clear(); } // ** Other methodes : ********************************************************/ PolymorphismMultiGContainer& PolymorphismMultiGContainer::operator=(const PolymorphismMultiGContainer& pmgc) { clear(); for (size_t i = 0; i < pmgc.size(); i++) { multilocusGenotypes_.push_back(new MultilocusGenotype(*pmgc.getMultilocusGenotype(i))); groups_.push_back(pmgc.getGroupId(i)); } set grp_ids = pmgc.getAllGroupsIds(); for (set::iterator it = grp_ids.begin(); it != grp_ids.end(); it++) { size_t id = *it; string name = pmgc.getGroupName(id); groups_names_[id] = name; } return *this; } /******************************************************************************/ void PolymorphismMultiGContainer::addMultilocusGenotype(const MultilocusGenotype& mg, size_t group) { multilocusGenotypes_.push_back(new MultilocusGenotype(mg)); groups_.push_back(group); map::const_iterator it = groups_names_.find(group); if (!(it != groups_names_.end()) ) { // ajouter ce groupe avec un nom vide groups_names_[group] = ""; } } /******************************************************************************/ const MultilocusGenotype* PolymorphismMultiGContainer::getMultilocusGenotype(size_t position) const throw (IndexOutOfBoundsException) { if (position >= size()) throw IndexOutOfBoundsException("PolymorphismMultiGContainer::getMultilocusGenotype: position out of bounds.", position, 0, size() - 1); return multilocusGenotypes_[position]; } /******************************************************************************/ MultilocusGenotype* PolymorphismMultiGContainer::removeMultilocusGenotype(size_t position) throw (IndexOutOfBoundsException) { if (position >= size()) throw IndexOutOfBoundsException("PolymorphismMultiGContainer::removeMultilocusGenotype: position out of bounds.", position, 0, size() - 1); MultilocusGenotype* tmp_mg = multilocusGenotypes_[position]; multilocusGenotypes_.erase(multilocusGenotypes_.begin() + position); groups_.erase(groups_.begin() + position); return tmp_mg; } /******************************************************************************/ void PolymorphismMultiGContainer::deleteMultilocusGenotype(size_t position) throw (IndexOutOfBoundsException) { if (position >= size()) throw IndexOutOfBoundsException("PolymorphismMultiGContainer::deleteMultilocusGenotype: position out of bounds.", position, 0, size() - 1); delete multilocusGenotypes_[position]; multilocusGenotypes_.erase(multilocusGenotypes_.begin() + position); groups_.erase(groups_.begin() + position); } /******************************************************************************/ bool PolymorphismMultiGContainer::isAligned() const { size_t value = 0; for (size_t i = 0; i < size(); i++) { if (i == 0) value = multilocusGenotypes_[i]->size(); else if (multilocusGenotypes_[i]->size() != value) return false; } return true; } /******************************************************************************/ size_t PolymorphismMultiGContainer::getNumberOfLoci() const throw (Exception) { if (!isAligned()) throw Exception("MultilocusGenotypes are not aligned."); if (size() < 1) return 0; return multilocusGenotypes_[0]->size(); } /******************************************************************************/ size_t PolymorphismMultiGContainer::getGroupId(size_t position) const throw (IndexOutOfBoundsException) { if (position >= size()) throw IndexOutOfBoundsException("PolymorphismMultiGContainer::getGroupId: position out of bounds.", position, 0, size() - 1); return groups_[position]; } /******************************************************************************/ void PolymorphismMultiGContainer::setGroupId(size_t position, size_t group_id) throw (IndexOutOfBoundsException) { if (position >= size()) throw IndexOutOfBoundsException("PolymorphismMultiGContainer::setGroupId: position out of bounds.", position, 0, size() - 1); groups_[position] = group_id; } /******************************************************************************/ std::set PolymorphismMultiGContainer::getAllGroupsIds() const { set groups_ids; for (size_t i = 0; i < size(); i++) { groups_ids.insert(groups_[i]); } return groups_ids; } /******************************************************************************/ std::vector PolymorphismMultiGContainer::getAllGroupsNames() const { vector grps_names; map::const_iterator it; for (it = groups_names_.begin(); it != groups_names_.end(); it++) { string name = it->second; if (!name.empty()) grps_names.push_back(name); else grps_names.push_back(TextTools::toString(it->first) ); } return grps_names; } /******************************************************************************/ bool PolymorphismMultiGContainer::groupExists(size_t group) const { for (size_t i = 0; i < size(); i++) { if (groups_[i] == group) return true; } return false; } /******************************************************************************/ size_t PolymorphismMultiGContainer::getNumberOfGroups() const { return getAllGroupsIds().size(); } /******************************************************************************/ size_t PolymorphismMultiGContainer::getGroupSize(size_t group) const { size_t counter = 0; for (size_t i = 0; i < size(); i++) { if (groups_[i] == group) counter++; } return counter; } /******************************************************************************/ std::string PolymorphismMultiGContainer::getGroupName(size_t group_id) const throw (GroupNotFoundException) { string name = TextTools::toString(group_id); // par defaut on retourne le n° de groupe map::const_iterator it = groups_names_.find(group_id); if (it != groups_names_.end() ) name = it->second; else throw GroupNotFoundException("PolymorphismMultiGContainer::getGroupName: group not found.", group_id); return name; } /******************************************************************************/ void PolymorphismMultiGContainer::setGroupName(size_t group_id, std::string name) throw (GroupNotFoundException) { map::iterator it = groups_names_.find(group_id); if (it != groups_names_.end() ) it->second = name; else throw GroupNotFoundException("PolymorphismMultiGContainer::getGroupName: group not found.", group_id); return; } /******************************************************************************/ void PolymorphismMultiGContainer::addGroupName(size_t group_id, std::string name) { groups_names_[group_id] = name; return; } /******************************************************************************/ size_t PolymorphismMultiGContainer::getLocusGroupSize(size_t group, size_t locus_position) const { size_t counter = 0; for (size_t i = 0; i < size(); i++) { try { if (groups_[i] == group && !multilocusGenotypes_[i]->isMonolocusGenotypeMissing(locus_position)) counter++; } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("PolymorphismMultiGContainer::getGroupSize: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } return counter; } /******************************************************************************/ size_t PolymorphismMultiGContainer::size() const { return multilocusGenotypes_.size(); } /******************************************************************************/ void PolymorphismMultiGContainer::clear() { for (size_t i = 0; i < multilocusGenotypes_.size(); i++) { delete multilocusGenotypes_[i]; } multilocusGenotypes_.clear(); groups_.clear(); groups_names_.clear(); } /******************************************************************************/ bpp-popgen-2.1.0/src/Bpp/PopGen/DarwinVarSingle.cpp000644 000000 000000 00000010124 12147656633 022015 0ustar00rootroot000000 000000 // // File DarwinVarSingle.cpp // Authors : Sylvain Gaillard // Last modification : April 7, 2008 // /* Copyright or © or Copr. Bio++ Development Team, (April 7, 2008) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "DarwinVarSingle.h" using namespace bpp; using namespace std; DarwinVarSingle::DarwinVarSingle(size_t missingData) : missingData_(missingData) {} DarwinVarSingle::~DarwinVarSingle() {} void DarwinVarSingle::write(ostream& os, const DataSet& data_set) const throw (Exception) { if (!os) throw IOException("DarwinVarSingle::write: fail to open stream."); StlOutputStreamWrapper out(&os); (out << "@DARwin 5.0 - SINGLE").endLine(); size_t ind_nbr = 0; for (size_t i = 0; i < data_set.getNumberOfGroups(); i++) { ind_nbr += data_set.getNumberOfIndividualsInGroup(i); } vector header; header.push_back("Unit"); for (size_t i = 0; i < data_set.getNumberOfLoci(); i++) { const LocusInfo& li = data_set.getLocusInfoAtPosition(i); for (size_t j = 0; j < li.getNumberOfAlleles(); j++) { header.push_back(li.getName() + "." + li.getAlleleInfoByKey(j).getId()); } } size_t var_nbr = header.size() - 1; // header.push_back("Name"); (out << ind_nbr << "\t" << var_nbr).endLine(); VectorTools::print(header, out, "\t"); // size_t ind_index = 0; const AnalyzedLoci* al = data_set.getAnalyzedLoci(); for (size_t i = 0; i < data_set.getNumberOfGroups(); i++) { size_t ind_nbr_ig = data_set.getNumberOfIndividualsInGroup(i); for (size_t j = 0; j < ind_nbr_ig; j++) { vector var; const MultilocusGenotype& geno = data_set.getIndividualAtPositionFromGroup(i, j)->getGenotype(); for (size_t k = 0; k < geno.size(); k++) { const MonolocusGenotype& mg = geno.getMonolocusGenotype(k); if (geno.isMonolocusGenotypeMissing(k)) { for (size_t l = 0; l < al->getNumberOfAlleles()[k]; l++) { var.push_back(missingData_); } } else { for (size_t l = 0; l < al->getNumberOfAlleles()[k]; l++) { size_t flag = 0; if (VectorTools::contains(mg.getAlleleIndex(), l)) flag = 1; var.push_back(flag); } } // var.push_back((mg->getAlleleIndex()).size()); } (out << j + (i * ind_nbr_ig) + 1 << "\t" << VectorTools::paste(var, "\t")).endLine(); } } } void DarwinVarSingle::write(const string& path, const DataSet& data_set, bool overwrite) const throw (Exception) { AbstractODataSet::write(path, data_set, overwrite); } bpp-popgen-2.1.0/src/Bpp/PopGen/MultilocusGenotypeStatistics.cpp000644 000000 000000 00000076125 12147656633 024721 0ustar00rootroot000000 000000 /* * File MultilocusGenotypeStatistics.cpp * Author : Sylvain Gaillard * Last modification : Wednesday August 04 2004 * */ /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include #include "MultilocusGenotypeStatistics.h" #include "PolymorphismMultiGContainerTools.h" using namespace bpp; // From STL #include #include #include using namespace std; vector MultilocusGenotypeStatistics::getAllelesIdsForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (IndexOutOfBoundsException) { map tmp_alleles; try { tmp_alleles = getAllelesMapForGroups(pmgc, locus_position, groups); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getAllelesIdsForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } return MapTools::getKeys(tmp_alleles); } size_t MultilocusGenotypeStatistics::countGametesForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (IndexOutOfBoundsException) { map allele_count; size_t nb_tot_allele = 0; try { allele_count = getAllelesMapForGroups(pmgc, locus_position, groups); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::countGametesForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } vector counter = MapTools::getValues(allele_count); for (size_t i = 0; i < counter.size(); i++) { nb_tot_allele += counter[i]; } return nb_tot_allele; } map MultilocusGenotypeStatistics::getAllelesMapForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (IndexOutOfBoundsException) { map alleles_count; for (size_t i = 0; i < pmgc.size(); i++) { try { if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (groups.find(pmgc.getGroupId(i)) != groups.end()) ) { // if (! pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (find(groups.begin(), groups.end(), pmgc.getGroupId(i)) != groups.end()) ) { vector tmp_alleles = pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(locus_position).getAlleleIndex(); for (size_t j = 0; j < tmp_alleles.size(); j++) { alleles_count[tmp_alleles[j]]++; } } } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getAllelesMapForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } return alleles_count; } map MultilocusGenotypeStatistics::getAllelesFrqForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (Exception) { map alleles_frq; size_t nb_tot_allele = 0; map tmp_alleles; try { tmp_alleles = getAllelesMapForGroups(pmgc, locus_position, groups); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getAllelesFrqForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } vector counter = MapTools::getValues(tmp_alleles); for (size_t i = 0; i < counter.size(); i++) { nb_tot_allele += counter[i]; } if (nb_tot_allele == 0) throw ZeroDivisionException("MultilocusGenotypeStatistics::getAllelesFrqForGroups."); for (map::iterator it = tmp_alleles.begin(); it != tmp_alleles.end(); it++) { alleles_frq[it->first] = static_cast(it->second) / static_cast(nb_tot_allele); } return alleles_frq; } size_t MultilocusGenotypeStatistics::countNonMissingForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (IndexOutOfBoundsException) { size_t counter = 0; for (size_t i = 0; i < pmgc.size(); i++) { try { // if (! pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (find(groups.begin(), groups.end(), pmgc.getGroupId(i)) != groups.end()) ) if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (groups.find(pmgc.getGroupId(i) ) != groups.end()) ) counter++; } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::countNonMissing: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } return counter; } size_t MultilocusGenotypeStatistics::countBiAllelicForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (IndexOutOfBoundsException) { size_t counter = 0; for (size_t i = 0; i < pmgc.size(); i++) { try { // if (! pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (find(groups.begin(), groups.end(), pmgc.getGroupId(i)) != groups.end()) ) if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (groups.find(pmgc.getGroupId(i)) != groups.end()) ) if ((pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(locus_position).getAlleleIndex()).size() == 2) counter++; } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::countBiAllelic: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } return counter; } map MultilocusGenotypeStatistics::countHeterozygousForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (IndexOutOfBoundsException) { map counter; for (size_t i = 0; i < pmgc.size(); i++) { try { if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (groups.find(pmgc.getGroupId(i)) != groups.end() )) { const MonolocusGenotype& tmp_mg = pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(locus_position); if ((tmp_mg.getAlleleIndex()).size() == 2) { if (!dynamic_cast(tmp_mg).isHomozygous()) { vector tmp_alleles = tmp_mg.getAlleleIndex(); for (size_t j = 0; j < tmp_alleles.size(); j++) { counter[tmp_alleles[j]]++; } } } } } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::countHeterozygous: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } return counter; } map MultilocusGenotypeStatistics::getHeterozygousFrqForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (Exception) { map freq; size_t counter = 0; for (size_t i = 0; i < pmgc.size(); i++) { try { if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (groups.find(pmgc.getGroupId(i)) != groups.end()) ) { const MonolocusGenotype& tmp_mg = pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(locus_position); if ((tmp_mg.getAlleleIndex()).size() == 2) { counter++; if (!dynamic_cast(tmp_mg).isHomozygous()) { vector tmp_alleles = tmp_mg.getAlleleIndex(); for (size_t j = 0; j < tmp_alleles.size(); j++) { freq[tmp_alleles[j]]++; } } } } } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getHeterozygousFrqForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } if (counter == 0) throw ZeroDivisionException("MultilocusGenotypeStatistics::getHeterozygousFrqForGroups."); for (map::iterator i = freq.begin(); i != freq.end(); i++) { i->second = (double) i->second / (double) counter; } return freq; } double MultilocusGenotypeStatistics::getHobsForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (Exception) { map heterozygous_frq; double frq = 0.; try { heterozygous_frq = getHeterozygousFrqForGroups(pmgc, locus_position, groups); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getHobsForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (ZeroDivisionException& zde) { throw ZeroDivisionException("MultilocusGenotypeStatistics::getHobsForGroups."); } for (map::iterator it = heterozygous_frq.begin(); it != heterozygous_frq.end(); it++) { frq += it->second; } return frq / static_cast(heterozygous_frq.size()); } double MultilocusGenotypeStatistics::getHexpForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (Exception) { map allele_frq; double frqsqr = 0.; try { allele_frq = getAllelesFrqForGroups(pmgc, locus_position, groups); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getHexpForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (ZeroDivisionException& zde) { throw ZeroDivisionException("MultilocusGenotypeStatistics::getHexpForGroups."); } for (map::iterator it = allele_frq.begin(); it != allele_frq.end(); it++) { frqsqr += it->second * it->second; } return 1 - frqsqr; } double MultilocusGenotypeStatistics::getHnbForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (Exception) { size_t nb_alleles; double Hexp; try { nb_alleles = countGametesForGroups(pmgc, locus_position, groups); Hexp = getHexpForGroups(pmgc, locus_position, groups); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getHnbForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (ZeroDivisionException& zde) { throw ZeroDivisionException("MultilocusGenotypeStatistics::getHnbForGroups."); } return 2 * static_cast(nb_alleles) * Hexp / static_cast((2 * nb_alleles) - 1); } double MultilocusGenotypeStatistics::getDnei72(const PolymorphismMultiGContainer& pmgc, vector locus_positions, size_t grp1, size_t grp2) throw (Exception) { map allele_frq1, allele_frq2; vector allele_ids; set group1_id; set group2_id; set groups_id; double Jx = 0.; double Jy = 0.; double Jxy = 0.; group1_id.insert(grp1); group2_id.insert(grp2); groups_id.insert(grp1); groups_id.insert(grp2); for (size_t i = 0; i < locus_positions.size(); i++) { allele_ids.clear(); allele_frq1.clear(); allele_frq2.clear(); try { allele_ids = getAllelesIdsForGroups(pmgc, locus_positions[i], groups_id); allele_frq1 = getAllelesFrqForGroups(pmgc, locus_positions[i], group1_id); allele_frq2 = getAllelesFrqForGroups(pmgc, locus_positions[i], group2_id); } catch (Exception& e) { throw e; } for (size_t j = 0; j < allele_ids.size(); j++) { map::iterator it1 = allele_frq1.find(allele_ids[j]); map::iterator it2 = allele_frq2.find(allele_ids[j]); double tmp_frq1 = (it1 != allele_frq1.end()) ? it1->second : 0.; double tmp_frq2 = (it2 != allele_frq2.end()) ? it2->second : 0.; Jx += tmp_frq1 * tmp_frq1; Jy += tmp_frq2 * tmp_frq2; Jxy += tmp_frq1 * tmp_frq2; } } if (Jx * Jy == 0.) throw ZeroDivisionException("MultilocusGenotypeStatistics::getDnei72."); return -log(Jxy / sqrt(Jx * Jy)); } double MultilocusGenotypeStatistics::getDnei78(const PolymorphismMultiGContainer& pmgc, vector locus_positions, size_t grp1, size_t grp2) throw (Exception) { map allele_frq1, allele_frq2; vector allele_ids; set group1_id; set group2_id; set groups_id; double Jx = 0.; double Jy = 0.; double Jxy = 0.; size_t nx = 0, ny = 0; group1_id.insert(grp1); group2_id.insert(grp2); groups_id.insert(grp1); groups_id.insert(grp2); for (size_t i = 0; i < locus_positions.size(); i++) { allele_ids.clear(); allele_frq1.clear(); allele_frq2.clear(); try { allele_ids = getAllelesIdsForGroups(pmgc, locus_positions[i], groups_id); allele_frq1 = getAllelesFrqForGroups(pmgc, locus_positions[i], group1_id); allele_frq2 = getAllelesFrqForGroups(pmgc, locus_positions[i], group2_id); nx = countBiAllelicForGroups(pmgc, locus_positions[i], group1_id); ny = countBiAllelicForGroups(pmgc, locus_positions[i], group2_id); } catch (Exception& e) { throw e; } double tmp_Jx = 0.; double tmp_Jy = 0.; for (size_t j = 0; j < allele_ids.size(); j++) { map::iterator it1 = allele_frq1.find(allele_ids[j]); map::iterator it2 = allele_frq2.find(allele_ids[j]); double tmp_frq1 = (it1 != allele_frq1.end()) ? it1->second : 0.; double tmp_frq2 = (it2 != allele_frq2.end()) ? it2->second : 0.; tmp_Jx += tmp_frq1 * tmp_frq1; tmp_Jy += tmp_frq2 * tmp_frq2; Jxy += tmp_frq1 * tmp_frq2; } Jx += ((2. * (double) nx * tmp_Jx) - 1.) / ((2. * (double) nx) - 1.); Jy += ((2. * (double) ny * tmp_Jy) - 1.) / ((2. * (double) ny) - 1.); } double denom = Jx * Jy; if (denom == 0.) throw ZeroDivisionException("MultilocusGenotypeStatistics::getDnei78."); return -log(Jxy / sqrt(denom)); } map MultilocusGenotypeStatistics::getAllelesFstats(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (Exception) { map vc = getVarianceComponents(pmgc, locus_position, groups); map f_stats; for (map::iterator it = vc.begin(); it != vc.end(); it++) { double abc = it->second.a + it->second.b + it->second.c; double bc = it->second.b + it->second.c; if (abc == 0) { f_stats[it->first].Fit = NAN; f_stats[it->first].Fst = NAN; } { f_stats[it->first].Fit = 1. - it->second.c / abc; f_stats[it->first].Fst = it->second.a / abc; } if (bc == 0) f_stats[it->first].Fis = NAN; else f_stats[it->first].Fis = 1. - it->second.c / bc; } return f_stats; } map MultilocusGenotypeStatistics::getAllelesFit(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (Exception) { map values = getVarianceComponents(pmgc, locus_position, groups); map Fit; for (map::iterator it = values.begin(); it != values.end(); it++) { Fit[it->first] = it->second.a + it->second.b + it->second.c; if (Fit[it->first] == 0.) throw ZeroDivisionException("MultilocusGenotypeStatistics::getAllelesFit."); Fit[it->first] = 1. - it->second.c / Fit[it->first]; } return Fit; } map MultilocusGenotypeStatistics::getAllelesFst(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (Exception) { if (groups.size() <= 1) throw BadIntegerException("MultilocusGenotypeStatistics::getAllelesFst: groups must be >= 2.", static_cast(groups.size())); map values = getVarianceComponents(pmgc, locus_position, groups); map Fst; for (map::iterator it = values.begin(); it != values.end(); it++) { Fst[it->first] = it->second.a + it->second.b + it->second.c; if (Fst[it->first] == 0.) throw ZeroDivisionException("MultilocusGenotypeStatistics::getAllelesFst."); Fst[it->first] = it->second.a / Fst[it->first]; } return Fst; } map MultilocusGenotypeStatistics::getAllelesFis(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (Exception) { map values = getVarianceComponents(pmgc, locus_position, groups); map Fis; for (map::iterator it = values.begin(); it != values.end(); it++) { Fis[it->first] = it->second.b + it->second.c; if (Fis[it->first] == 0.) throw ZeroDivisionException("MultilocusGenotypeStatistics::getAllelesFis."); Fis[it->first] = 1. - it->second.c / Fis[it->first]; } return Fis; } map MultilocusGenotypeStatistics::getVarianceComponents(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set& groups) throw (ZeroDivisionException) { map values; // Base values computation double nbar = 0.; double nc = 0.; vector ids = getAllelesIdsForGroups(pmgc, locus_position, groups); map pbar; map s2; map hbar; for (size_t i = 0; i < ids.size(); i++) { pbar[ids[i]] = 0.; s2[ids[i]] = 0.; hbar[ids[i]] = 0.; } double r = static_cast(groups.size()); for (set::iterator set_it = groups.begin(); set_it != groups.end(); set_it++) { size_t i = (*set_it); double ni = static_cast(pmgc.getLocusGroupSize(i, locus_position)); set group_id; group_id.insert( i ); map pi = getAllelesFrqForGroups(pmgc, locus_position, group_id); map hi = getHeterozygousFrqForGroups(pmgc, locus_position, group_id); nbar += ni; if (r > 1) nc += ni * ni; for (map::iterator it = pi.begin(); it != pi.end(); it++) { pbar[it->first] += ni * it->second; } for (map::iterator it = hi.begin(); it != hi.end(); it++) { hbar[it->first] += ni * it->second; } group_id.clear(); } nbar = nbar / r; if (nbar <= 1) throw ZeroDivisionException("MultilocusGenotypeStatistics::getVarianceComponents."); if (r > 1) nc = (r * nbar) - (nc / (r * nbar)) / (r - 1.); for (map::iterator it = pbar.begin(); it != pbar.end(); it++) { it->second = it->second / (r * nbar); } for (map::iterator it = hbar.begin(); it != hbar.end(); it++) { it->second = it->second / ( r * nbar); } for (set::iterator set_it = groups.begin(); set_it != groups.end(); set_it++) { size_t i = (*set_it); double ni = static_cast(pmgc.getLocusGroupSize( i, locus_position)); set group_id; group_id.insert( i ); map pi = getAllelesFrqForGroups(pmgc, locus_position, group_id); for (size_t j = 0; j < ids.size(); j++) { pi[ids[j]]; } for (map::iterator it = pi.begin(); it != pi.end(); it++) { s2[it->first] += ni * (it->second - pbar[it->first]) * (it->second - pbar[it->first]); } group_id.clear(); } for (map::iterator it = s2.begin(); it != s2.end(); it++) { it->second = it->second / ((r - 1.) * nbar); } // a, b, c computation for (size_t i = 0; i < ids.size(); i++) { values[ids[i]]; } for (map::iterator it = values.begin(); it != values.end(); it++) { it->second.a = (nbar / nc) * (s2[it->first] - ((1. / (nbar - 1.)) * ((pbar[it->first] * (1. - pbar[it->first])) - (s2[it->first] * ((double) r - 1.) / r) - ((1. / 4.) * hbar[it->first])))); it->second.b = (nbar / (nbar - 1.)) * ((pbar[it->first] * (1. - pbar[it->first])) - (s2[it->first] * ((double) r - 1.) / (double) r) - ((((2. * nbar) - 1.) / (4. * nbar)) * hbar[it->first])); it->second.c = hbar[it->first] / 2.; } return values; } double MultilocusGenotypeStatistics::getWCMultilocusFst(const PolymorphismMultiGContainer& pmgc, vector locus_positions, const set& groups) throw (Exception) { double A, B, C; A = B = C = 0.0; for (size_t i = 0; i < locus_positions.size(); i++) { //count total number of individuals without missing data size_t ni = 0; for (set::iterator setIt = groups.begin() ; setIt != groups.end() ; setIt++) { ni += pmgc.getLocusGroupSize( (*setIt), i); } // reduce computation for polymorphic loci for that groups vector ids = getAllelesIdsForGroups(pmgc, i, groups); if (ids.size() >= 2 && ni >= 1) { map values = getVarianceComponents(pmgc, locus_positions[i], groups); for (map::iterator it = values.begin(); it != values.end(); it++) { A += it->second.a; B += it->second.b; C += it->second.c; } } } if ((A + B + C) == 0) throw ZeroDivisionException("MultilocusGenotypeStatistics::getWCMultilocusFst."); return A / (A + B + C); } double MultilocusGenotypeStatistics::getWCMultilocusFis(const PolymorphismMultiGContainer& pmgc, vector locus_positions, const set& groups) throw (Exception) { double B, C; B = C = 0.0; for (size_t i = 0; i < locus_positions.size(); i++) { //count total number of individuals without missing data size_t ni = 0; for (set::iterator setIt = groups.begin() ; setIt != groups.end() ; setIt++) { ni += pmgc.getLocusGroupSize( (*setIt), i); } // reduce computation for polymorphic loci for that groups vector ids = getAllelesIdsForGroups(pmgc, i, groups); if (ids.size() >= 2 && ni >= 1) { map values = getVarianceComponents(pmgc, locus_positions[i], groups); for (map::iterator it = values.begin(); it != values.end(); it++) { B += it->second.b; C += it->second.c; } } } if ((B + C) == 0) throw ZeroDivisionException("MultilocusGenotypeStatistics::getWCMultilocusFis."); return 1.0 - C / (B + C); } MultilocusGenotypeStatistics::PermResults MultilocusGenotypeStatistics::getWCMultilocusFstAndPerm(const PolymorphismMultiGContainer& pmgc, vector locus_positions, set groups, int nb_perm) throw (Exception) { // extract a PolymorphismMultiGContainer with only those groups PolymorphismMultiGContainer sub_pmgc = PolymorphismMultiGContainerTools::extractGroups(pmgc, groups); double nb_sup = 0.0; double nb_inf = 0.0; PermResults results; results.Statistic = getWCMultilocusFst(sub_pmgc, locus_positions, groups); if (nb_perm > 0) { for (int i = 0; i < nb_perm; i++) { PolymorphismMultiGContainer permuted_pmgc = PolymorphismMultiGContainerTools::permutMultiG( sub_pmgc); double Fst_perm = getWCMultilocusFst(permuted_pmgc, locus_positions, groups); // cout << Fst_perm << endl; if (Fst_perm > results.Statistic) nb_sup++; if (Fst_perm < results.Statistic) nb_inf++; } nb_sup /= (double) nb_perm; nb_inf /= (double) nb_perm; } results.Percent_sup = nb_sup; results.Percent_inf = nb_inf; return results; } MultilocusGenotypeStatistics::PermResults MultilocusGenotypeStatistics::getWCMultilocusFisAndPerm(const PolymorphismMultiGContainer& pmgc, vector locus_positions, set groups, int nb_perm) throw (Exception) { // extract a PolymorphismMultiGContainer with only those groups PolymorphismMultiGContainer sub_pmgc = PolymorphismMultiGContainerTools::extractGroups(pmgc, groups); double nb_sup = 0.0; double nb_inf = 0.0; PermResults results; results.Statistic = getWCMultilocusFis(sub_pmgc, locus_positions, groups); if (nb_perm > 0) { for (int i = 0; i < nb_perm; i++) { PolymorphismMultiGContainer permuted_pmgc = PolymorphismMultiGContainerTools::permutIntraGroupAlleles(sub_pmgc, groups); double Fis_perm = getWCMultilocusFis(permuted_pmgc, locus_positions, groups); if (Fis_perm > results.Statistic) nb_sup++; if (Fis_perm < results.Statistic) nb_inf++; } nb_sup /= (double) nb_perm; nb_inf /= (double) nb_perm; } results.Percent_sup = nb_sup; results.Percent_inf = nb_inf; return results; } double MultilocusGenotypeStatistics::getRHMultilocusFst(const PolymorphismMultiGContainer& pmgc, vector locus_positions, const set& groups) throw (Exception) { double Au, Bu, Cu; double RH = 0.0; int nb_alleles = 0; int total_alleles = 0; for (size_t i = 0; i < locus_positions.size(); i++) { // reduce computation for polymorphic loci for that groups vector ids = getAllelesIdsForGroups(pmgc, i, groups); if (ids.size() >= 2) { nb_alleles = 0; // mean allelic frequencies map< size_t, double > P = MultilocusGenotypeStatistics::getAllelesFrqForGroups (pmgc, locus_positions[i], groups); // variance components from W&C map values = getVarianceComponents(pmgc, locus_positions[i], groups); for (map::iterator it = values.begin(); it != values.end(); it++) { Au = it->second.a; Bu = it->second.b; Cu = it->second.c; if ((Au + Bu + Cu) != 0) { double Pu = P[it->first]; // it->first is the allele number RH += (1 - Pu) * Au / (Au + Bu + Cu); nb_alleles++; } } total_alleles += (nb_alleles - 1); } } if (total_alleles == 0) throw ZeroDivisionException("MultilocusGenotypeStatistics::getRHMultilocusFst."); return RH / double(total_alleles); } std::auto_ptr MultilocusGenotypeStatistics::getDistanceMatrix(const PolymorphismMultiGContainer& pmgc, vector locus_positions, const set& groups, string distance_methode) throw (Exception) { vector names = pmgc.getAllGroupsNames(); vector grp_ids_vect; for (set::iterator i = groups.begin(); i != groups.end(); i++) { grp_ids_vect.push_back(*i); } auto_ptr _dist(new DistanceMatrix(names)); for (size_t i = 0; i < groups.size(); i++) { (*_dist)(i, i) = 0; } set pairwise_grp; for (size_t j = 0; j < groups.size () - 1; j++) { for (size_t k = j + 1; k < groups.size (); k++) { double distance = 0; if (distance_methode == "nei72") distance = MultilocusGenotypeStatistics::getDnei72( pmgc, locus_positions, grp_ids_vect[j], grp_ids_vect[k] ); else if (distance_methode == "nei78") distance = MultilocusGenotypeStatistics::getDnei78( pmgc, locus_positions, grp_ids_vect[j], grp_ids_vect[k] ); else if (distance_methode == "WC") // Fst multilocus selon W&C { pairwise_grp.insert(grp_ids_vect[j] ); pairwise_grp.insert(grp_ids_vect[k] ); distance = MultilocusGenotypeStatistics::getWCMultilocusFst( pmgc, locus_positions, pairwise_grp); pairwise_grp.clear(); } else if (distance_methode == "RH") // Fst multilocus selon ponderation Robertson & Hill { pairwise_grp.insert(grp_ids_vect[j] ); pairwise_grp.insert(grp_ids_vect[k] ); distance = MultilocusGenotypeStatistics::getRHMultilocusFst( pmgc, locus_positions, pairwise_grp); pairwise_grp.clear(); } else if (distance_methode == "Nm") // Nm déduit des Fst multilocus selon W&C modèle en îles Fst = 1/(1+4Nm) { pairwise_grp.insert(grp_ids_vect[j] ); pairwise_grp.insert(grp_ids_vect[k] ); distance = MultilocusGenotypeStatistics::getWCMultilocusFst( pmgc, locus_positions, pairwise_grp); if (distance != 0) distance = 0.25 * (1 - distance) / distance; else distance = NAN; pairwise_grp.clear(); } else if (distance_methode == "D") // D=-ln(1-Fst) of Reynolds, Weir and Cockerham, 1983 { pairwise_grp.insert(grp_ids_vect[j] ); pairwise_grp.insert(grp_ids_vect[k] ); distance = MultilocusGenotypeStatistics::getWCMultilocusFst( pmgc, locus_positions, pairwise_grp); if (distance != 1) distance = -log(1 - distance); else distance = NAN; pairwise_grp.clear(); } else if (distance_methode == "Rousset") // Calcul de Fst/(1-Fst). Rousset F. 1997 { pairwise_grp.insert(grp_ids_vect[j] ); pairwise_grp.insert(grp_ids_vect[k] ); distance = MultilocusGenotypeStatistics::getWCMultilocusFst( pmgc, locus_positions, pairwise_grp); if (distance != 1) distance = distance / (1 - distance); else distance = NAN; pairwise_grp.clear(); } (*_dist)(k, j) = distance; (*_dist)(j, k) = distance; } // for k } // for j return _dist; } bpp-popgen-2.1.0/src/Bpp/PopGen/MonoAlleleMonolocusGenotype.cpp000644 000000 000000 00000006475 12147656633 024435 0ustar00rootroot000000 000000 // // File MonoAlleleMonolocusGenotype.cpp // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "MonoAlleleMonolocusGenotype.h" using namespace bpp; using namespace std; // ** Class constructor: *******************************************************/ MonoAlleleMonolocusGenotype::MonoAlleleMonolocusGenotype(size_t allele_index) : allele_index_(allele_index) {} MonoAlleleMonolocusGenotype::MonoAlleleMonolocusGenotype(std::vector allele_index) throw (BadSizeException) : allele_index_(0) { if (allele_index.size() != 1) throw BadSizeException("MonoAlleleMonolocusGenotype::MonoAlleleMonolocusGenotype: allele_index must conain one value.", allele_index.size(), 1); allele_index_ = allele_index[0]; } MonoAlleleMonolocusGenotype::MonoAlleleMonolocusGenotype(const MonoAlleleMonolocusGenotype& mmg) : allele_index_(mmg.getAlleleIndex()[0]) {} // ** Class destructor: ********************************************************/ MonoAlleleMonolocusGenotype::~MonoAlleleMonolocusGenotype() {} // ** Other methodes: **********************************************************/ MonoAlleleMonolocusGenotype& MonoAlleleMonolocusGenotype::operator=(const MonoAlleleMonolocusGenotype& mmg) { allele_index_ = mmg.getAlleleIndex()[0]; return *this; } bool MonoAlleleMonolocusGenotype::operator==(const MonoAlleleMonolocusGenotype& mmg) const { return allele_index_ == mmg.getAlleleIndex()[0]; } std::vector MonoAlleleMonolocusGenotype::getAlleleIndex() const { vector index; index.push_back(allele_index_); return index; } MonoAlleleMonolocusGenotype* MonoAlleleMonolocusGenotype::clone() const { return new MonoAlleleMonolocusGenotype(*this); } bpp-popgen-2.1.0/src/Bpp/PopGen/AlleleInfo.h000644 000000 000000 00000005116 12147656633 020442 0ustar00rootroot000000 000000 // // File AlleleInfo.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ALLELEINFO_H_ #define _ALLELEINFO_H_ // From STL #include #include namespace bpp { /** * @brief The AlleleInfo interface. * * An AlleleInfo is a data structure designed to store informations about * alleles in general like the size of the marker for example. * * @author Sylvain Gaillard */ class AlleleInfo : public virtual Clonable { public: // Destructor virtual ~AlleleInfo() {} public: // Methodes /** * @brief Set the identifier of the allele. */ virtual void setId(const std::string& allele_id) = 0; /** * @brief Get the identitier of the allele. */ virtual const std::string& getId() const = 0; /** * @name The Clonable interface * * @{ */ #ifdef NO_VIRTUAL_COV Clonable* #else AlleleInfo* #endif clone() const = 0; /** @} */ }; } // end of namespace bpp; #endif // _ALLELEINFO_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/BasicAlleleInfo.cpp000644 000000 000000 00000005315 12147656633 021740 0ustar00rootroot000000 000000 // // File BasicAlleleInfo.cpp // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "BasicAlleleInfo.h" using namespace bpp; // ** Class constructor: *******************************************************/ BasicAlleleInfo::BasicAlleleInfo(const std::string& id) : id_(id) {} BasicAlleleInfo::BasicAlleleInfo(const BasicAlleleInfo& allele) : id_(allele.getId()) {} // ** Class destructor: *******************************************************/ BasicAlleleInfo::~BasicAlleleInfo() {} // ** Other methodes: *********************************************************/ BasicAlleleInfo& BasicAlleleInfo::operator=(const BasicAlleleInfo& allele) { id_ = allele.getId(); return *this; } bool BasicAlleleInfo::operator==(const BasicAlleleInfo& allele) const { return id_ == allele.getId(); } bool BasicAlleleInfo::operator!=(const BasicAlleleInfo& allele) const { return !(id_ == allele.getId()); } void BasicAlleleInfo::setId(const std::string& allele_id) { id_ = allele_id; } const std::string& BasicAlleleInfo::getId() const { return id_; } bpp-popgen-2.1.0/src/Bpp/PopGen/AnalyzedLoci.h000644 000000 000000 00000011717 12147656633 021012 0ustar00rootroot000000 000000 // // File AnalyzedLoci.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ANALYZEDLOCI_H_ #define _ANALYZEDLOCI_H_ // From STL #include #include #include // From local #include "LocusInfo.h" #include "GeneralExceptions.h" namespace bpp { /** * @brief The AnalyzedLoci class. * * This is a LocusInfo container. * Its instanciation requires a number of locus wich is fixed * and can't be modified. * * @author Sylvain Gaillard */ class AnalyzedLoci { private: std::vector loci_; public: // Constructors and Destructor /** * @brief Build a void AnalyzedLoci with a specific number of loci. */ AnalyzedLoci(size_t number_of_loci); /** * @brief Copy constructor. */ AnalyzedLoci(const AnalyzedLoci& analyzed_loci); /** * @brief Destroy the AnalyzedLoci. */ ~AnalyzedLoci(); public: // Other methodes /** * @brief Set a LocusInfo. * * @throw IndexOutOfBoundsException if locus_position is out of bounds. */ void setLocusInfo(size_t locus_position, const LocusInfo& locus) throw (IndexOutOfBoundsException); /** * @brief Get the position of a LocusInfo. * * @throw BadIdentifierException if locus_name is not found. */ size_t getLocusInfoPosition(const std::string& locus_name) const throw (BadIdentifierException); /** * @brief Get a LocusInfo by name. * * @throw BadIdentifierException if locus_name is not found. */ const LocusInfo& getLocusInfoByName(const std::string& locus_name) const throw (BadIdentifierException); /** * @brief Get a LocusInfo by its position. * * @throw NullPointerException if the LocusInfo is not difined. * @throw IndexOutOfBoundsException if locus_position is out of bounds. */ const LocusInfo& getLocusInfoAtPosition(size_t locus_position) const throw (Exception); /** * @brief Add an AlleleInfo to a LocusInfo by LocusInfo name. * * @throw BadIdentifierException if the allele's id is already in use. * @throw LocusNotFoundException if locus_name is not found. */ void addAlleleInfoByLocusName(const std::string& locus_name, const AlleleInfo& allele) throw (Exception); /** * @brief Add an AlleleInfo to a LocusInfo by its position. * * @throw BadIdentifierException if the allele's id is already in use. * @throw IndexOutOfBoundsException if locus_position is out of bounds. */ void addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo& allele) throw (Exception); /** * @brief Get the number of loci. */ size_t getNumberOfLoci() const { return loci_.size(); } /** * @brief Get the number of alleles at each locus. */ std::vector getNumberOfAlleles() const; /** * @brief Get the ploidy of a locus by name. * * @throw LocusNotFoundException if locus_name is not found. */ unsigned int getPloidyByLocusName(const std::string& locus_name) const throw (LocusNotFoundException); /** * @brief Get the ploidy of a locus by its position. * * @throw IndexOutOfBoundsException if locus_position is out of bounds. */ unsigned int getPloidyByLocusPosition(size_t locus_position) const throw (IndexOutOfBoundsException); }; } // end of namespace bpp; #endif // _ANALYZEDLOCI_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/MultiSeqIndividual.h000644 000000 000000 00000025052 12147656633 022205 0ustar00rootroot000000 000000 // // File MultiSeqIndividual.h // Author : Sylvain Gaillard // Last modification : Tuesday August 03 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _MULTISEQINDIVIDUAL_H_ #define _MULTISEQINDIVIDUAL_H_ // From STL #include #include #include #include #include #include // From SeqLib #include #include #include // From PopGenLib #include "Locality.h" #include "Date.h" #include "MultilocusGenotype.h" #include "GeneralExceptions.h" namespace bpp { /** * @brief
*** UNUSED CLASS ***
The MultiSeqIndividual class. * *
*** UNUSED CLASS ***
* This class is designed to store data on a single individual. * This individual can store numerous sequences for each place. It was the * first working implementation which manages sequences as a map of sequence * container. We have replaced it with a simplest individual with only one * sequence per locus. * * @author Sylvain Gaillard */ class MultiSeqIndividual { private: std::string id_; unsigned short sex_; Date* date_; Point2D* coord_; const Locality* locality_; std::map sequences_; MultilocusGenotype* genotype_; public: // Constructors and destructor : /** * @brief Build a void new MultiSeqIndividual. */ MultiSeqIndividual(); /** * @brief Build a new MultiSeqIndividual with an identifier. */ MultiSeqIndividual(const std::string& id); /** * @brief Build a new MultiSeqIndividual with parameters. * * @param id The id of the MultiSeqIndividual as a string. * @param date The date of the MultiSeqIndividual as a Date object. * @param coord The coordinates of the MultiSeqIndividual as a Coord object. * @param locality The locality of the MultiSeqIndividual as a pointer to a Locality * object. * @param sex The sex of the MultiSeqIndividual as an unsigned short. */ MultiSeqIndividual(const std::string& id, const Date& date, const Point2D& coord, Locality* locality, const unsigned short sex); /** * @brief The MultiSeqIndividual copy constructor. */ MultiSeqIndividual(const MultiSeqIndividual& ind); /** * @brief Destroy an MultiSeqIndividual. */ virtual ~MultiSeqIndividual(); public: // Methodes /** * @brief The MultiSeqIndividual copy operator. * * @return A ref toward the assigned MultiSeqIndividual. * Make a copy of each atribute of the MultiSeqIndividual. */ MultiSeqIndividual& operator=(const MultiSeqIndividual& ind); /** * @brief Set the id of the MultiSeqIndividual. * * @param id The id of the MultiSeqIndividual as a string. */ void setId(const std::string id); /** * @brief Get the id of the MultiSeqIndividual. * * @return The id of the MultiSeqIndividual as a string. */ std::string getId() const; /** * @brief Set the sex of the MultiSeqIndividual. * * @param sex An unsigned short coding for the sex. */ void setSex(const unsigned short sex); /** * @brief Get the sex of the MultiSeqIndividual. * * @return The sex of the MultiSeqIndividual as an unsigned short. */ unsigned short getSex() const; /** * @brief Set the date of the MultiSeqIndividual. * * @param date The date as a Date object. */ void setDate(const Date& date); /** * @brief Get the date of the MultiSeqIndividual. * * @return A pointer toward a Date object if the MultiSeqIndividual has a date. * Otherwise throw a NullPointerException. */ const Date* getDate() const throw (NullPointerException); /** * @brief Tell if this MultiSeqIndividual has a date. */ bool hasDate() const; /** * @brief Set the coodinates of the MultiSeqIndividual. * * @param coord A Point2D object. */ void setCoord(const Point2D& coord); /** * @brief Set the coordinates of the MultiSeqIndividual. * * @param x The X coordinate as a double. * @param y The Y coordinate as a double. */ void setCoord(const double x, const double y); /** * @brief Get the coordinates of the Induvidual. * * @return A pointer toward a Point2D object if the MultiSeqIndividual has * coordinates. Otherwise throw a NullPointerException. */ const Point2D* getCoord() const throw (NullPointerException); /** * @brief Tell if this MultiSeqIndividual has coordinates. */ bool hasCoord() const; /** * @brief Set the X coordinate of the MultiSeqIndividual. * * @param x The X coordinate as a double. * * Set the X coordinate if the MultiSeqIndividual has coordinates. * Otherwise throw a NullPointerException. */ void setX(const double x) throw (NullPointerException); /** * @brief Set the Y coordinate of th MultiSeqIndividual. * * @param y The Y coordinate as a double. * * Set the Y coordinate if the MultiSeqIndividual has coordinates. * Otherwise throw a NullPointerException. */ void setY(const double y) throw (NullPointerException); /** * @brief Get the X coordinate of the MultiSeqIndividual. * * @return The X coordinate as a double if the MultiSeqIndividual has coordinates. * Otherwise throw a NullPointerException. */ double getX() const throw (NullPointerException); /** * @brief Get the Y coordinate of the MultiSeqIndividual. * * @return The Y coordinate as a double if the MultiSeqIndividual has coordinates. * Otherwise throw a NullPointerException. */ double getY() const throw (NullPointerException); /** * @brief Set the locality of the MultiSeqIndividual. * * @param locality A pointer to a Locality object. */ void setLocality(const Locality* locality); /** * @brief Get the locality of the MultiSeqIndividual. * * @return A pointer to the Locality of the MultiSeqIndividual. */ const Locality* getLocality() const throw (NullPointerException); /** * @brief Tell if this MultiSeqIndividual has a locality. */ bool hasLocality() const; /** * @brief Get a pointer to the VectorSequenceContainer at a named locus. * * @param id The id of the sequence set (i.e. locus). */ const VectorSequenceContainer* getVectorSequenceContainer(const std::string& id) const throw (Exception); /** * @brief Add a sequence in a named sequence set. * * @param id The id of the sequence set. * @param sequence The sequence to add. * @throw AlphabetMismatchException if the sequence's alphabet doesn't match the container's alphabet. * @throw BadIdentifierException if sequence's name is already in use. */ void addSequence(const std::string& id, const Sequence& sequence) throw (Exception); /** * @brief Get a named sequence from a named sequence set. * * @param id The id of the sequence set. * @param name The name of the sequence. * * @return A reference to the sequence. */ const Sequence& getSequence(const std::string& id, const std::string& name) const throw (Exception); /** * @brief Get an indexed sequence from a named sequence set. * * @param id The id of the sequence set. * @param i The index of the sequence in the sequence set. * * @return A reference to the sequence. */ const Sequence& getSequence(const std::string& id, const size_t i) const throw (Exception); /** * @brief Get the sequence set ids. * * @return All the keys of the sequence sets in a vector. */ std::vector getSequencesKeys() const; /** * @brief Remove a named sequence from a named sequence set. * * @param id The id of the sequence set. * @param name The name of the sequence. * * @return A pointer to a copy of the removed sequence. */ Sequence* removeSequence(const std::string& id, const std::string& name); /** * @brief Delete a named sequence from a named sequence set. * * @param id The id of the sequence set. * @param name The name of the sequence. */ void deleteSequence(const std::string& id, const std::string& name); /** * @brief Tell if the MultiSeqIndividual has some sequences. */ bool hasSequences() const; /** * @brief Count the number of sequece set. */ size_t getNumberOfSequenceSet() const; /** * @brief Get the number of sequences in a sequence set. */ size_t getNumberOfSequences(const std::string& id) const throw (Exception); /** * @brief Add a genotype. * * @param genotype The MultilocusGenotype to add. */ void addGenotype(const MultilocusGenotype& genotype); /** * @brief Get the genotype. */ const MultilocusGenotype* getGenotype() const throw (NullPointerException); /** * @brief Tell if the MultiSeqIndividual has a MultilocusGenotype. */ bool hasGenotype() const; }; } // end of namespace bpp; #endif // _MULTISEQINDIVIDUAL_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/Group.cpp000644 000000 000000 00000057507 12147656633 020072 0ustar00rootroot000000 000000 // // File Group.cpp // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "Group.h" using namespace bpp; using namespace std; // ** Class constructors: ******************************************************/ Group::Group(size_t group_id) : id_(group_id), name_(""), individuals_(vector()) {} Group::Group(const Group& group) : id_(group.getGroupId()), name_(group.getGroupName()), // individuals_(vector(group.getNumberOfIndividuals())) individuals_(vector()) { for (size_t i = 0; i < group.getNumberOfIndividuals(); i++) { addIndividual(group.getIndividualAtPosition(i)); } } Group::Group(const Group& group, size_t group_id) : id_(group_id), name_(group.getGroupName()), individuals_(vector()) { for (size_t i = 0; i < group.getNumberOfIndividuals(); i++) { addIndividual(group.getIndividualAtPosition(i)); } } // ** Class destructor: ********************************************************/ Group::~Group () {} // ** Other methodes: **********************************************************/ Group& Group::operator=(const Group& group) { setGroupId(group.getGroupId()); for (size_t i = 0; i < group.getNumberOfIndividuals(); i++) { addIndividual(group.getIndividualAtPosition(i)); } return *this; } void Group::setGroupId(size_t group_id) { id_ = group_id; } void Group::setGroupName(const std::string& group_name) { name_ = group_name; } void Group::addIndividual(const Individual& ind) throw (BadIdentifierException) { try { getIndividualPosition(ind.getId()); throw BadIdentifierException("Group::addIndividual: individual id already used.", ind.getId()); } catch (BadIdentifierException& bie) {} individuals_.push_back(new Individual(ind)); } void Group::addEmptyIndividual(const std::string& individual_id) throw (BadIdentifierException) { for (size_t i = 0; i < getNumberOfIndividuals(); i++) { if (individuals_[i]->getId() == individual_id) throw BadIdentifierException("Group::addEmptyIndividual: individual_id already in use.", individual_id); } individuals_.push_back(new Individual(individual_id)); } size_t Group::getIndividualPosition(const std::string& individual_id) const throw (IndividualNotFoundException) { for (size_t i = 0; i < getNumberOfIndividuals(); i++) { if (individuals_[i]->getId() == individual_id) return i; } throw IndividualNotFoundException("Group::getIndividualPosition: individual_id not found.", individual_id); } std::auto_ptr Group::removeIndividualById(const std::string& individual_id) throw (IndividualNotFoundException) { try { size_t indPos = getIndividualPosition(individual_id); auto_ptr ind(individuals_[indPos]); individuals_.erase(individuals_.begin() + indPos); return ind; } catch (IndividualNotFoundException& infe) { throw IndividualNotFoundException("Group::removeIndividualById: individual_id not found.", individual_id); } } std::auto_ptr Group::removeIndividualAtPosition(size_t individual_position) throw (IndexOutOfBoundsException) { if (individual_position >= individuals_.size()) throw IndexOutOfBoundsException("Group::removeIndividualAtPosition.", individual_position, 0, individuals_.size()); auto_ptr ind(individuals_[individual_position]); individuals_.erase(individuals_.begin() + individual_position); return ind; } void Group::deleteIndividualById(const std::string& individual_id) throw (IndividualNotFoundException) { try { removeIndividualById(individual_id); } catch (IndividualNotFoundException& infe) { throw IndividualNotFoundException("Group::deleteIndividualById: individual_id not found.", individual_id); } } void Group::deleteIndividualAtPosition(size_t individual_position) throw (IndexOutOfBoundsException) { try { removeIndividualAtPosition(individual_position); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("Group::deleteIndividualAtPosition.", individual_position, 0, getNumberOfIndividuals()); } } void Group::clear() { for (size_t i = 0; i < individuals_.size(); i++) { delete (individuals_[i]); } individuals_.clear(); } const Individual& Group::getIndividualById(const std::string& individual_id) const throw (IndividualNotFoundException) { for (size_t i = 0; i < individuals_.size(); i++) { if (individuals_[i]->getId() == individual_id) return getIndividualAtPosition(i); } throw IndividualNotFoundException("Group::getIndividualById: individual_id not found.", individual_id); } const Individual& Group::getIndividualAtPosition(size_t individual_position) const throw (IndexOutOfBoundsException) { if (individual_position >= individuals_.size()) throw IndexOutOfBoundsException("Group::getIndividualAtPosition: individual_position out of bounds.", individual_position, 0, individuals_.size()); return *individuals_[individual_position]; } size_t Group::getNumberOfIndividuals() const { return individuals_.size(); } size_t Group::getMaxNumberOfSequences() const { size_t maxnum = 0; for (size_t i = 0; i < getNumberOfIndividuals(); i++) { vector seqpos = individuals_[i]->getSequencesPositions(); for (size_t j = 0; j < seqpos.size(); j++) { if (maxnum < seqpos[j]) maxnum = seqpos[j]; } } return maxnum + 1; } // -- Dealing with individual's properties ----------------- void Group::setIndividualSexAtPosition(size_t individual_position, const unsigned short sex) throw (IndexOutOfBoundsException) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::setIndividualSexAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); individuals_[individual_position]->setSex(sex); } unsigned short Group::getIndividualSexAtPosition(size_t individual_position) const throw (IndexOutOfBoundsException) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::getIndividualSexAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); return individuals_[individual_position]->getSex(); } void Group::setIndividualDateAtPosition(size_t individual_position, const Date& date) throw (IndexOutOfBoundsException) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::setIndividualDateAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); individuals_[individual_position]->setDate(date); } const Date& Group::getIndividualDateAtPosition(size_t individual_position) const throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::getIndividualDateAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { return individuals_[individual_position]->getDate(); } catch (NullPointerException& npe) { throw NullPointerException("Group::getIndividualDateAtPosition: individual has no date."); } } void Group::setIndividualCoordAtPosition(size_t individual_position, const Point2D& coord) throw (IndexOutOfBoundsException) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::setIndividualCoordAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); individuals_[individual_position]->setCoord(coord); } const Point2D& Group::getIndividualCoordAtPosition(size_t individual_position) const throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::getIndividualCoordAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { return individuals_[individual_position]->getCoord(); } catch (NullPointerException& npe) { throw NullPointerException("Group::getIndividualCoordAtPosition: individual has no coordinates."); } } void Group::setIndividualLocalityAtPosition(size_t individual_position, const Locality* locality) throw (IndexOutOfBoundsException) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::setIndividualLocalityAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); individuals_[individual_position]->setLocality(locality); } const Locality& Group::getIndividualLocalityAtPosition(size_t individual_position) const throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::getIndividualLocalityAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { return *individuals_[individual_position]->getLocality(); } catch (NullPointerException& npe) { throw NullPointerException("Group::getIndividualLocalityAtPosition: individuals has no locality."); } } void Group::addIndividualSequenceAtPosition(size_t individual_position, size_t sequence_position, const Sequence& sequence) throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::addIndividualSequenceAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { individuals_[individual_position]->addSequence(sequence_position, sequence); } catch (AlphabetMismatchException& ame) { throw AlphabetMismatchException("Group::addIndividualSequenceAtPosition: sequence's alphabet doesn't match.", ame.getAlphabets()[0], ame.getAlphabets()[1]); } catch (BadIdentifierException& bie) { throw BadIdentifierException("Group::addIndividualSequenceAtPosition: sequence's name already in use.", bie.getIdentifier()); } catch (BadIntegerException& bie) { throw BadIntegerException("Group::addIndividualSequenceAtPosition: sequence_position already in use.", bie.getBadInteger()); } } const Sequence& Group::getIndividualSequenceByName(size_t individual_position, const string& sequence_name) const throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::getIndividualSequenceByName: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { return individuals_[individual_position]->getSequenceByName(sequence_name); } catch (NullPointerException& npe) { throw NullPointerException("Group::getIndividualSequenceByName: no sequence data in individual."); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("Group::getIndividualSequenceByName: sequence_name not found.", snfe.getSequenceId()); } } const Sequence& Group::getIndividualSequenceAtPosition(size_t individual_position, size_t sequence_position) const throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::getIndividualAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { return individuals_[individual_position]->getSequenceAtPosition(sequence_position); } catch (NullPointerException& npe) { throw NullPointerException("Group::getIndividualSequenceAtPosition: no sequence data in individual."); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("Group::getIndividualSequenceAtPosition: sequence_position not found.", snfe.getSequenceId()); } } void Group::deleteIndividualSequenceByName(size_t individual_position, const string& sequence_name) throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::deleteIndividualSequenceByName: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { individuals_[individual_position]->deleteSequenceByName(sequence_name); } catch (NullPointerException& npe) { throw NullPointerException("Group::deleteSequenceByName: no sequence data in individual."); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("Group::deleteSequenceByName: sequence_name not found.", snfe.getSequenceId()); } } void Group::deleteIndividualSequenceAtPosition(size_t individual_position, size_t sequence_position) throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::deleteIndividualSequenceAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { individuals_[individual_position]->deleteSequenceAtPosition(sequence_position); } catch (NullPointerException& npe) { throw NullPointerException("Group::deleteSequenceAtPosition: no sequence data in individual."); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("Group::deleteSequenceAtPosition: sequence_position not found.", snfe.getSequenceId()); } } bool Group::hasIndividualSequences(size_t individual_position) const throw (IndexOutOfBoundsException) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::hasIndividualSequences: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); return individuals_[individual_position]->hasSequences(); } vector Group::getIndividualSequencesNames(size_t individual_position) const throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::getIndividualSequencesNames: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { return individuals_[individual_position]->getSequencesNames(); } catch (NullPointerException& npe) { throw NullPointerException("Group::getSequencesNames: no sequence data in individual."); } } size_t Group::getIndividualSequencePosition(size_t individual_position, const string& sequence_name) const throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::getIndividualSequencePosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { return individuals_[individual_position]->getSequencePosition(sequence_name); } catch (NullPointerException& npe) { throw NullPointerException("Group::getSequencePosition: no sequence data in individual."); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("Group::getSequencePosition: sequence_name not found.", snfe.getSequenceId()); } } size_t Group::getIndividualNumberOfSequences(size_t individual_position) const throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::getIndividualNumberOfSequences: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { return individuals_[individual_position]->getNumberOfSequences(); } catch (NullPointerException& npe) { throw NullPointerException("Group::getIndividualNumberOfSequences: no sequence data in individual."); } } void Group::setIndividualSequences(size_t individual_position, const MapSequenceContainer& msc) throw (IndexOutOfBoundsException) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::setIndividualSequences: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); individuals_[individual_position]->setSequences(msc); } void Group::setIndividualGenotype(size_t individual_position, const MultilocusGenotype& genotype) throw (IndexOutOfBoundsException) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::setIndividualGenotype: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); individuals_[individual_position]->setGenotype(genotype); } void Group::initIndividualGenotype(size_t individual_position, size_t loci_number) throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::initIndividualGenotype: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { individuals_[individual_position]->initGenotype(loci_number); } catch (BadIntegerException& bie) { throw BadIntegerException("Group::initIndividualGenotype: loci_number must be > 0.", bie.getBadInteger()); } catch (Exception) { throw Exception("Group::initIndividualGenotype: individual already has a genotype."); } } void Group::deleteIndividualGenotype(size_t individual_position) throw (IndexOutOfBoundsException) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::deleteIndividualGenotype: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); individuals_[individual_position]->deleteGenotype(); } bool Group::hasIndividualGenotype(size_t individual_position) const throw (IndexOutOfBoundsException) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::hasIndividualGenotype: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); return individuals_[individual_position]->hasGenotype(); } void Group::setIndividualMonolocusGenotype(size_t individual_position, size_t locus_position, const MonolocusGenotype& monogen) throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::setIndividualMonolocusGenotype: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { individuals_[individual_position]->setMonolocusGenotype(locus_position, monogen); } catch (NullPointerException& npe) { throw NullPointerException("Group::setIndividualMonolocusGenotype: individual has no genotype."); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("Group::setIndividualMonolocusGenotype: locus_position excedes the number of locus.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } void Group::setIndividualMonolocusGenotypeByAlleleKey(size_t individual_position, size_t locus_position, const std::vector& allele_keys) throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::setIndividualMonolocusGenotypeByAlleleKey: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { individuals_[individual_position]->setMonolocusGenotypeByAlleleKey(locus_position, allele_keys); } catch (NullPointerException& npe) { throw NullPointerException("Group::setIndividualMonolocusGenotypeByAlleleKey: individual has no genotype."); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("Group::setIndividualMonolocusGenotypeByAlleleKey: locus_position excedes the number of locus.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (Exception) { throw Exception("Group::setIndividualMonolocusGenotypeByAlleleKey: no key in allele_keys."); } } void Group::setIndividualMonolocusGenotypeByAlleleId(size_t individual_position, size_t locus_position, const std::vector& allele_id, const LocusInfo& locus_info) throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::setIndividualMonolocusGenotypeByAlleleId: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { individuals_[individual_position]->setMonolocusGenotypeByAlleleId(locus_position, allele_id, locus_info); } catch (NullPointerException& npe) { throw NullPointerException("Group::setIndividualMonolocusGenotypeByAlleleId: individual has no genotype."); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("Group::setIndividualMonolocusGenotypeByAlleleId: locus_position excedes the number of locus.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (AlleleNotFoundException& anfe) { throw AlleleNotFoundException("Group::setIndividualMonolocusGenotypeByAlleleId: id not found.", anfe.getIdentifier()); } } const MonolocusGenotype& Group::getIndividualMonolocusGenotype(size_t individual_position, size_t locus_position) const throw (Exception) { if (individual_position >= getNumberOfIndividuals()) throw IndexOutOfBoundsException("Group::getIndividualMonolocusGenotype: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals()); try { return individuals_[individual_position]->getMonolocusGenotype(locus_position); } catch (NullPointerException& npe) { throw NullPointerException("Group::getIndividualMonolocusGenotype: individual has no genotype."); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("Group::getIndividualMonolocusGenotype: locus_position excedes the number of locus.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } bool Group::hasSequenceData() const { for (size_t i = 0; i < getNumberOfIndividuals(); i++) { if (hasIndividualSequences(i)) return true; } return false; } const Alphabet* Group::getAlphabet() const throw (NullPointerException) { for (size_t i = 0; i < getNumberOfIndividuals(); i++) { if (hasIndividualSequences(i)) return individuals_[i]->getSequenceAlphabet(); } throw NullPointerException("Group::getAlphabet: individual has no sequence data."); } size_t Group::getGroupSizeForLocus(size_t locus_position) const { size_t count = 0; for (size_t i = 0; i < individuals_.size(); i++) { if (individuals_[i]->hasGenotype() && !individuals_[i]->getGenotype().isMonolocusGenotypeMissing(locus_position)) count++; } return count; } size_t Group::getGroupSizeForSequence(size_t sequence_position) const { size_t count = 0; for (size_t i = 0; i < individuals_.size(); i++) { if (individuals_[i]->hasSequences()) { try { individuals_[i]->getSequenceAtPosition(sequence_position); count++; } catch (...) {} } } return count; } bpp-popgen-2.1.0/src/Bpp/PopGen/MonolocusGenotype.h000644 000000 000000 00000005423 12147656633 022122 0ustar00rootroot000000 000000 // // File MonolocusGenotype.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _MONOLOCUSGENOTYPE_H_ #define _MONOLOCUSGENOTYPE_H_ // From STL #include #include namespace bpp { /** * @brief The MonolocusGenotype virtual class. * * A MonolocusGenotype containes the Alleles' keys defined in a Locus object. * This keys are returned as size_tegers. * This class is an interface for all monolocus genotypes. * * @author Sylvain Gaillard */ class MonolocusGenotype : public Clonable { public: // Constructors and Destructor /** * @brief Destroy a MonolocusGenotype. */ virtual ~MonolocusGenotype() {} public: // Methodes /** * @brief Get the alleles' index. * * The alleles' index are the position of the AlleleInfo in a LocusInfo object. * If no LocusInfo is used, the index are just numbers to identify the alleles. * * @return A vector of size_t. * * The size of the vector corresponds to the number of alleles at this locus. */ virtual std::vector getAlleleIndex() const = 0; }; } // end of namespace bpp; #endif // _MONOLOCUSGENOTYPE_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/MultiAlleleMonolocusGenotype.cpp000644 000000 000000 00000007274 12147656633 024615 0ustar00rootroot000000 000000 // // File MultiAlleleMonolocusGenotype.cpp // Author : Sylvain Gaillard // Last modification : Wednesday March 5 2008 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "MultiAlleleMonolocusGenotype.h" using namespace bpp; using namespace std; // ** Class constructor: *******************************************************/ MultiAlleleMonolocusGenotype::MultiAlleleMonolocusGenotype(std::vector allele_index) : allele_index_(vector(allele_index.size())) { for (size_t i = 0; i < allele_index.size(); ++i) { allele_index_[i] = allele_index[i]; } } MultiAlleleMonolocusGenotype::MultiAlleleMonolocusGenotype(const MultiAlleleMonolocusGenotype& mmg) : allele_index_(vector(mmg.allele_index_.size())) { for (size_t i = 0; i < mmg.getAlleleIndex().size(); ++i) { allele_index_[i] = mmg.getAlleleIndex()[i]; } } // ** Class destructor: ********************************************************/ MultiAlleleMonolocusGenotype::~MultiAlleleMonolocusGenotype() { allele_index_.clear(); } // ** Other methodes: **********************************************************/ MultiAlleleMonolocusGenotype& MultiAlleleMonolocusGenotype::operator=(const MultiAlleleMonolocusGenotype& mmg) { for (size_t i = 0; i < mmg.getAlleleIndex().size(); ++i) { allele_index_.push_back(mmg.getAlleleIndex()[i]); } return *this; } bool MultiAlleleMonolocusGenotype::operator==(const MultiAlleleMonolocusGenotype& mmg) const { return (allele_index_[0] == mmg.getAlleleIndex()[0] && allele_index_[1] == mmg.getAlleleIndex()[1]) || (allele_index_[0] == mmg.getAlleleIndex()[1] && allele_index_[1] == mmg.getAlleleIndex()[0]); } bool MultiAlleleMonolocusGenotype::isHomozygous() const { for (size_t i = 1; i < allele_index_.size(); ++i) { if (allele_index_[i - 1] != allele_index_[i]) return false; } return true; } std::vector MultiAlleleMonolocusGenotype::getAlleleIndex() const { return allele_index_; } MultiAlleleMonolocusGenotype* MultiAlleleMonolocusGenotype::clone() const { return new MultiAlleleMonolocusGenotype(*this); } bpp-popgen-2.1.0/src/Bpp/PopGen/IODataSet.h000644 000000 000000 00000004561 12147656633 020210 0ustar00rootroot000000 000000 // // File IODataSet.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ // Secured inclusion of header's file #ifndef _IODATASET_H_ #define _IODATASET_H_ #include "DataSet.h" #include // From STL #include #include namespace bpp { /** * @brief Interface for input/ouput with DataSet. * * IODataSet is a virtual class. * This is an interface to declare commune methodes for in/out action on DataSet. * * @author Sylvain Gaillard */ class IODataSet : public virtual IOFormat { /** * @name The IOFormat interface. * @{ */ const std::string getDataType() const { return "DataSet for population genetics"; } /** * @} */ }; } // end of namespace bpp; #endif // _IODATASET_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/Date.h000644 000000 000000 00000011415 12147656633 017304 0ustar00rootroot000000 000000 // // File Date.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _DATE_H_ #define _DATE_H_ #include #include namespace bpp { /** * @brief The Date class * * This is a little class to deal with dates. * * @author Sylvain Gaillard */ class Date : public Clonable { private: int day_; int month_; int year_; public: // Constructors and destructor /** * @brief Build a new Date from three values. * * Build a new Date from three integers. * The default Date is set to 01-01-2000. * * @param day The day between 1 and 31. * @param month The month between 1 and 12. * @param year The year as a signed int. */ Date(const int day = 1, const int month = 1, const int year = 2000) throw (BadIntegerException); /** * @brief The Date copy constructor. */ Date(const Date& date); /** * @brief Destroy the Date object. */ ~Date(); public: // Methodes /** * @brief The Date copy operator. * * @return A ref toward the assigned Date. */ Date& operator=(const Date& date); /** * @brief Set the Date. * * @param day The day as an integer between 1 and 31. * @param month The month as an integer between 1 and 12. * @param year The year as an integer. */ void setDate(const int day, const int month, const int year) throw (BadIntegerException); /** * @brief Set the year. * * @param year The year as an integer. */ void setYear(const int year); /** * @brief Set the month. * * @param month The month as an integer between 1 and 12. */ void setMonth(const int month) throw (BadIntegerException); /** * @brief Set the day. * * @param day The day as an integer between 1 and 31. */ void setDay(const int day) throw (BadIntegerException); /** * @brief Get the Date as a string. * * @return The date as a string DDMMYYYY (i.e. January 1 2000 : 01012000). */ std::string getDateStr() const; /** * @brief Get the Year as an int. */ int getYear() const { return year_; } /** * @brief Get the month as an int. */ int getMonth() const { return month_; } /** * @brief Get the day as an int. */ int getDay() const { return day_; } /** * @brief The == operator. * * Test the numerical equality between to dates. */ bool operator==(const Date& date) const; /** * @brief The < operator. * * Return true if the left Date is minor than the right Date. */ bool operator<(const Date& date) const; /** * @brief The != operator. */ bool operator!=(const Date& date) const { return !(*this == date); } /** * @brief The > operator. */ bool operator>(const Date& date) const { return date < *this; } /** * @brief The <= operator. */ bool operator<=(const Date& date) const { return !(date < *this); } /** * @brief The >= operator. */ bool operator>=(const Date& date) const { return !(*this < date); } /** * @name The Clonable interface * @{ */ #ifdef NO_VIRTUAL_COV Clonable* #else Date* #endif clone() const { return new Date(*this); } }; } // end of namespace bpp; #endif // _DATE_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/Individual.cpp000644 000000 000000 00000045113 12147656633 021054 0ustar00rootroot000000 000000 // // File Individual.cpp // Author : Sylvain Gaillard // Last modification : Tuesday August 03 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "Individual.h" using namespace bpp; using namespace std; // ** Class constructor: *******************************************************/ Individual::Individual() : id_(""), sex_(0), date_(0), coord_(0), locality_(0), sequences_(0), genotype_(0) {} Individual::Individual(const std::string& id) : id_(id), sex_(0), date_(0), coord_(0), locality_(0), sequences_(0), genotype_(0) {} Individual::Individual(const string& id, const Date& date, const Point2D& coord, Locality* locality, const unsigned short sex) : id_(id), sex_(sex), date_(new Date(date)), coord_(new Point2D(coord)), locality_(locality), sequences_(0), genotype_(0) {} Individual::Individual(const Individual& ind) : id_(ind.getId()), sex_(ind.getSex()), date_(0), coord_(0), locality_(0), sequences_(0), genotype_(0) { try { setDate(ind.getDate()); } catch (...) {} try { setCoord(ind.getCoord()); } catch (...) {} try { setLocality(ind.getLocality()); } catch (...) {} try { setSequences(dynamic_cast(ind.getSequences())); } catch (...) {} if (ind.hasGenotype()) genotype_.reset(new MultilocusGenotype(ind.getGenotype())); } // ** Class destructor: *******************************************************/ Individual::~Individual () {} // ** Other methodes: *********************************************************/ Individual& Individual::operator=(const Individual& ind) { setId(ind.getId()); setSex(ind.getSex()); try { setDate(ind.getDate()); } catch (NullPointerException) { date_.reset(); } try { setCoord(ind.getCoord()); } catch (NullPointerException) { coord_.reset(); } try { setLocality(ind.getLocality()); } catch (NullPointerException) { locality_ = 0; } try { setSequences(dynamic_cast(ind.getSequences())); } catch (NullPointerException) { sequences_.reset(); } genotype_.reset(ind.hasGenotype() ? new MultilocusGenotype(ind.getGenotype()) : 0); return *this; } /******************************************************************************/ // Id void Individual::setId(const std::string& id) { id_ = id; } /******************************************************************************/ // Sex void Individual::setSex(const unsigned short sex) { sex_ = sex; } /******************************************************************************/ // Date void Individual::setDate(const Date& date) { date_.reset(new Date(date)); } /******************************************************************************/ const Date& Individual::getDate() const throw (NullPointerException) { if (hasDate()) return *date_.get(); else throw (NullPointerException("Individual::getDate: no date associated to this individual.")); } /******************************************************************************/ bool Individual::hasDate() const { return date_.get() != 0; } /******************************************************************************/ // Coord void Individual::setCoord(const Point2D& coord) { coord_.reset(new Point2D(coord)); } /******************************************************************************/ void Individual::setCoord(const double x, const double y) { coord_.reset(new Point2D(x, y)); } /******************************************************************************/ const Point2D& Individual::getCoord() const throw (NullPointerException) { if (hasCoord()) return *coord_.get(); else throw (NullPointerException("Individual::getCoord: no coord associated to this individual.")); } /******************************************************************************/ bool Individual::hasCoord() const { return coord_.get() != 0; } /******************************************************************************/ void Individual::setX(const double x) throw (NullPointerException) { if (hasCoord()) coord_->setX(x); else throw (NullPointerException("Individual::setX: no coord associated to this individual.")); } /******************************************************************************/ void Individual::setY(const double y) throw (NullPointerException) { if (hasCoord()) coord_->setY(y); else throw (NullPointerException("Individual::setY: no coord associated to this individual.")); } /******************************************************************************/ double Individual::getX() const throw (NullPointerException) { if (hasCoord()) return coord_->getX(); else throw (NullPointerException("Individual::getX: no coord associated to this individual.")); } /******************************************************************************/ double Individual::getY() const throw (NullPointerException) { if (hasCoord()) return coord_->getY(); else throw (NullPointerException("Individual::getY: no coord associated to this individual.")); } /******************************************************************************/ // Locality void Individual::setLocality(const Locality* locality) { locality_ = locality; } /******************************************************************************/ const Locality* Individual::getLocality() const throw (NullPointerException) { if (hasLocality()) return locality_; else throw (NullPointerException("Individual::getLocality: no locality associated to this individual.")); } /******************************************************************************/ bool Individual::hasLocality() const { return locality_ != 0; } /******************************************************************************/ // Sequences void Individual::addSequence(size_t sequence_key, const Sequence& sequence) throw (Exception) { if (sequences_.get() == 0) sequences_.reset(new MapSequenceContainer(sequence.getAlphabet())); try { sequences_->addSequence(TextTools::toString(sequence_key), sequence); } catch (AlphabetMismatchException& ame) { throw (AlphabetMismatchException("Individual::addSequence: alphabets don't match.", ame.getAlphabets()[0], ame.getAlphabets()[1])); } catch (Exception& e) { if (string(e.what()).find("name") < string(e.what()).size()) throw (BadIdentifierException("Individual::addSequence: sequence's name already in use.", sequence.getName())); // if (string(e.what()).find("key") < string(e.what()).size()) else throw (Exception("Individual::addSequence: sequence_key already in use:" + TextTools::toString(sequence_key))); } } /******************************************************************************/ const Sequence& Individual::getSequenceByName(const std::string& sequence_name) const throw (Exception) { if (sequences_.get() == 0) throw NullPointerException("Individual::getSequenceByName: no sequence data."); try { return sequences_->getSequence(sequence_name); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("Individual::getSequenceByName: sequence_name not found.", snfe.getSequenceId()); } } /******************************************************************************/ const Sequence& Individual::getSequenceAtPosition(size_t sequence_position) const throw (Exception) { if (sequences_.get() == 0) throw NullPointerException("Individual::getSequenceAtPosition: no sequence data."); try { return sequences_->getSequenceByKey(TextTools::toString(sequence_position)); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("Individual::getSequenceAtPosition: sequence_position not found", snfe.getSequenceId()); } } /******************************************************************************/ void Individual::deleteSequenceByName(const std::string& sequence_name) throw (Exception) { if (sequences_.get() == 0) throw NullPointerException("Individual::deleteSequenceByName: no sequence data."); try { sequences_->deleteSequence(sequence_name); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("Individual::deleteSequenceByName: sequence_name not found.", snfe.getSequenceId()); } } /******************************************************************************/ void Individual::deleteSequenceAtPosition(size_t sequence_position) throw (Exception) { if (sequences_.get() == 0) throw NullPointerException("Individual::deleteSequenceAtPosition: no sequence data."); try { sequences_->deleteSequenceByKey(TextTools::toString(sequence_position)); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("Individual::deleteSequenceAtPosition: sequence_position not found.", snfe.getSequenceId()); } } /******************************************************************************/ std::vector Individual::getSequencesNames() const throw (NullPointerException) { if (sequences_.get() == 0) throw NullPointerException("Individual::getSequencesNames: no sequence data."); return sequences_->getSequencesNames(); } /******************************************************************************/ std::vector Individual::getSequencesPositions() const throw (NullPointerException) { if (sequences_.get() == 0) throw NullPointerException("Individual::getSequencesPositions: no sequence data."); vector seqpos; vector seqkeys = sequences_->getKeys(); for (size_t i = 0; i < seqkeys.size(); i++) { seqpos.push_back((size_t) TextTools::toInt(seqkeys[i])); } return seqpos; } /******************************************************************************/ size_t Individual::getSequencePosition(const std::string& sequence_name) const throw (Exception) { if (sequences_.get() == 0) throw NullPointerException("Individual::getSequencePosition: no sequence data."); try { return (size_t) TextTools::toInt(sequences_->getKey(getSequencePosition(sequence_name))); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("Individual::getSequencePosition: sequence_name not found.", snfe.getSequenceId()); } } /******************************************************************************/ bool Individual::hasSequences() const { return !(getNumberOfSequences() == 0); } /******************************************************************************/ bool Individual::hasSequenceAtPosition(size_t position) const { if (hasSequences()) { vector pos = getSequencesPositions(); for (size_t i = 0; i < pos.size(); i++) { if (pos[i] == position) return true; } } return false; } /******************************************************************************/ const Alphabet* Individual::getSequenceAlphabet() const throw (NullPointerException) { if (sequences_.get() == 0) throw NullPointerException("Individual::getSequenceAlphabet: no sequence data."); return sequences_->getAlphabet(); } /******************************************************************************/ size_t Individual::getNumberOfSequences() const { if (sequences_.get() == 0) return 0; return sequences_->getNumberOfSequences(); } /******************************************************************************/ void Individual::setSequences(const MapSequenceContainer& msc) { sequences_.reset(new MapSequenceContainer(msc)); } /******************************************************************************/ const OrderedSequenceContainer& Individual::getSequences() const throw (NullPointerException) { if (sequences_.get() == 0) throw NullPointerException("Individual::getSequences: no sequence data."); return *sequences_; } /******************************************************************************/ // MultilocusGenotype void Individual::setGenotype(const MultilocusGenotype& genotype) { genotype_.reset(new MultilocusGenotype(genotype)); } /******************************************************************************/ void Individual::initGenotype(size_t loci_number) throw (Exception) { if (hasGenotype()) throw Exception("Individual::initGenotype: individual already has a genotype."); try { genotype_.reset(new MultilocusGenotype(loci_number)); } catch (BadIntegerException& bie) { throw BadIntegerException("Individual::initGenotype: loci_number must be > 0.", bie.getBadInteger()); } } /******************************************************************************/ const MultilocusGenotype& Individual::getGenotype() const throw (NullPointerException) { if (!hasGenotype()) throw NullPointerException("Individual::getGenotype: individual has no genotype."); return *genotype_; } /******************************************************************************/ void Individual::deleteGenotype() { genotype_.reset(); } /******************************************************************************/ bool Individual::hasGenotype() const { return genotype_.get() != 0; } /******************************************************************************/ void Individual::setMonolocusGenotype(size_t locus_position, const MonolocusGenotype& monogen) throw (Exception) { if (!hasGenotype()) throw NullPointerException("Individual::setMonolocusGenotype: individual has no genotype."); try { genotype_->setMonolocusGenotype(locus_position, monogen); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("Individual::setMonolocusGenotype: locus_position out of boubds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } /******************************************************************************/ void Individual::setMonolocusGenotypeByAlleleKey(size_t locus_position, const std::vector allele_keys) throw (Exception) { if (!hasGenotype()) throw NullPointerException("Individual::setMonolocusGenotypeByAlleleKey: individual has no genotype."); try { genotype_->setMonolocusGenotypeByAlleleKey(locus_position, allele_keys); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("Individual::setMonolocusGenotypeByAlleleKey: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (Exception) { throw Exception("Individual::setMonolocusGenotypeByAlleleKey: no key in allele_keys."); } } /******************************************************************************/ void Individual::setMonolocusGenotypeByAlleleId(size_t locus_position, const std::vector allele_id, const LocusInfo& locus_info) throw (Exception) { if (!hasGenotype()) throw NullPointerException("Individual::setMonolocusGenotypeByAlleleId: individual has no genotype."); try { genotype_->setMonolocusGenotypeByAlleleId(locus_position, allele_id, locus_info); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("Individual::setMonolocusGenotypeByAlleleId: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } catch (AlleleNotFoundException& anfe) { throw AlleleNotFoundException("Individual::setMonolocusGenotypeByAlleleId: id not found.", anfe.getIdentifier()); } } /******************************************************************************/ const MonolocusGenotype& Individual::getMonolocusGenotype(size_t locus_position) throw (Exception) { if (!hasGenotype()) throw NullPointerException("Individual::getMonolocusGenotype: individual has no genotype."); try { return genotype_->getMonolocusGenotype(locus_position); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("Individual::getMonolocusGenotype: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } /******************************************************************************/ size_t Individual::countNonMissingLoci() const throw (NullPointerException) { if (!hasGenotype()) throw NullPointerException("Individual::countNonMissingLoci: individual has no genotype."); return genotype_->countNonMissingLoci(); } /******************************************************************************/ size_t Individual::countHomozygousLoci() const throw (NullPointerException) { if (!hasGenotype()) throw NullPointerException("Individual::countHomozygousLoci: individual has no genotype."); return genotype_->countHomozygousLoci(); } /******************************************************************************/ size_t Individual::countHeterozygousLoci() const throw (NullPointerException) { if (!hasGenotype()) throw NullPointerException("Individual::countHeterozygousLoci: individual has no genotype."); return genotype_->countHeterozygousLoci(); } /******************************************************************************/ bpp-popgen-2.1.0/src/Bpp/PopGen/DarwinDon.h000644 000000 000000 00000005354 12147656633 020321 0ustar00rootroot000000 000000 // // File DarwinDon.h // Author : Sylvain Gaillard // Last modification : April 7, 2008 // /* Copyright or © or Copr. CNRS, (April 7, 2008) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _DARWIN_DON_H_ #define _DARWIN_DON_H_ #include #include #include #include // From local Pop #include "AbstractODataSet.h" namespace bpp { /** * @brief The Darwin .don output format for popgenlib. * * @author Sylvain Gaillard */ class DarwinDon : public virtual AbstractODataSet { public: // Constructor and destructor DarwinDon(); ~DarwinDon(); public: /** * @name The ODataSet interface. * @{ */ void write(std::ostream& os, const DataSet& data_set) const throw (Exception); void write(const std::string& path, const DataSet& data_set, bool overwrite) const throw (Exception); /** * @} */ /** * @name The IOFormat interface * @{ */ const std::string getFormatName() const { return "Darwin .don"; } const std::string getFormatDescription() const { return "Darwin .don file store data identifying individuals."; } /** * @} */ }; } // end of namespace bpp; #endif // _DARWIN_DON_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/GeneMapperCsvExport.h000644 000000 000000 00000013011 12147656633 022322 0ustar00rootroot000000 000000 // // File: GeneMapperCsvExport.h // Author: Sylvain Gaillard // Created: April 2, 2008 // /* Copyright or © or Copr. Bio++ Development Team, (April 2, 2008) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _BPP_POPGEN_GENEMAPPERCSVEXPORT_H_ #define _BPP_POPGEN_GENEMAPPERCSVEXPORT_H_ #include #include #include #include #include // From local Pop #include "AbstractIDataSet.h" #include "BasicAlleleInfo.h" #include "MultiAlleleMonolocusGenotype.h" namespace bpp { /** * @brief The GeneMapperCsvExport input format for popgenlib. * * This input format takes a csv file exported from GeneMapper® (Applied Biosystems). * * @author Sylvain Gaillard */ class GeneMapperCsvExport : public AbstractIDataSet { public: static const std::string SAMPLE_FILE_H; static const std::string SAMPLE_NAME_H; static const std::string PANEL_H; static const std::string MARKER_H; static const std::string DYE_H; static const std::string ALLELE_H; static const std::string SIZE_H; static const std::string HEIGHT_H; static const std::string PEAK_AREA_H; static const std::string DAC_H; static const std::string AN_H; private: bool IndependentAlleles_; public: // Constructor and destructor GeneMapperCsvExport(bool ia = false); ~GeneMapperCsvExport(); // public: /** * @brief Set if allels are concidered as independent markers. * */ // SetAllelsAsIndependent(bool flag); public: /** * @name The IDataSet interface. * @{ */ void read(std::istream& is, DataSet& data_set) throw (Exception); void read(const std::string& path, DataSet& data_set) throw (Exception); DataSet* read(std::istream& is) throw (Exception); DataSet* read(const std::string& path) throw (Exception); /** * @} */ /** * @name The IOFormat interface * @{ */ virtual const std::string getFormatName() const { return "GeneMapper® cvs export"; } virtual const std::string getFormatDescription() const { return "GeneMapper® is a flexible genotyping software package that provides DNA sizing and quality allele calls for all Applied Biosystems electrophoresis-based genotyping systems."; } /** * @} */ /** * @brief Store data for one allele */ class Allele { private: std::string name_; double size_; unsigned int height_; double peakArea_; public: Allele(const std::string& name, double size, unsigned int height, double peakArea) : name_(name), size_(size), height_(height), peakArea_(peakArea) {} const std::string& getName() const { return name_; } const double& getSize() const { return size_; } const unsigned int& getHeight() const { return height_; } const double& getPeakArea() const { return peakArea_; } }; /** * @brief Store one line of the GeneMapper file */ class Record { private: std::string sampleFile_; std::string sampleName_; std::string panel_; std::string markerName_; std::string dye_; std::vector< GeneMapperCsvExport::Allele > alleles_; std::string dac_; double an_; public: /** * @brief Constructor * * @param row One row of the file as a std::string */ Record(const std::string& row); const std::string& getSampleFileName() const { return sampleFile_; } const std::string& getSampleName() const { return sampleName_; } const std::string& getPanel() const { return panel_; } const std::string& getMarkerName() const { return markerName_; } const std::string& getDye() const { return dye_; } const size_t getNumberOfAllele() const { return alleles_.size(); } const GeneMapperCsvExport::Allele& getAllele(size_t allelePos) const { return alleles_[allelePos]; } }; }; } // end of namespace bpp; #endif // _BPP_POPGEN_GENEMAPPERCSVEXPORT_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/PopgenlibIO.h000644 000000 000000 00000013455 12147656633 020604 0ustar00rootroot000000 000000 // // File PopgenlibIO.h // Created by: Sylvain Gaillard // Created on: Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _POPULIBIO_H_ #define _POPULIBIO_H_ #include #include #include // From Seq #include #include // From local Pop #include "AbstractIDataSet.h" #include "AbstractODataSet.h" #include "BasicAlleleInfo.h" namespace bpp { /** * @brief The native I/O format for popgenlib. * * @author Sylvain Gaillard */ class PopgenlibIO : public AbstractIDataSet, public AbstractODataSet { public: // Constantes static const std::string WHITESPACE; static const std::string TAB; static const std::string COMA; static const std::string SEMICOLON; static const std::string DIPLOID; static const std::string HAPLOID; static const std::string HAPLODIPLOID; static const std::string UNKNOWN; private: char data_separator_; char missing_data_symbol_; std::vector getValues_(std::string& param_line, const std::string& delim); void parseGeneral_(const std::vector& in, DataSet& data_set); void parseLocality_(const std::vector& in, DataSet& data_set); void parseSequence_(const std::vector& in, VectorSequenceContainer& vsc); void parseLoci_(const std::vector& in, std::vector& locus_info); void parseIndividual_(const std::vector& in, DataSet& data_set, const VectorSequenceContainer& vsc); public: // Constructor and destructor PopgenlibIO(); PopgenlibIO(const std::string& missing_data_symbol, const std::string& data_separator) throw (Exception); ~PopgenlibIO(); public: /** * @brief Get the code for missing data. */ std::string getMissingDataSymbol() const; /** * @brief Get the code for data separator. */ std::string getDataSeparator() const; /** * @brief Get the character for missing data. */ char getMissingDataChar() const; /** * @brief Get the data separator char. */ char getDataSeparatorChar() const; /** * @brief Set the code for missing data. * * The character used to code missing data can be every single non numerical * character and can't be the same used as data separator or a white space * or a tabulation. * The default value is '$'. * * @throw Excpetion if missing_data_symbol is a not allowed character. * @throw Exception if missing_data_symbol contains more than one character. */ void setMissingDataSymbol(const std::string& missing_data_symbol) throw (Exception); /** * @brief Set the code for data separator. * * The character used to separate data can be every single non numerical * character and can't be the same used for coding missing data. * Most common characters used are: *
  • the white space: "WHITESPACE"
  • *
  • the tabulation: "TAB"
  • *
  • the coma: "COMA"
  • *
  • the semicolon: "SEMICOLON"
* The default value is "WHITESPACE". * * @throw Exception if data_separator is a not allowed character. * @throw Exception if data_separator containes more than one character other than the two codes defined upper. */ void setDataSeparator(const std::string& data_separator) throw (Exception); /** * @name The IDataSet interface. * @{ */ void read(std::istream& is, DataSet& data_set) throw (Exception); void read(const std::string& path, DataSet& data_set) throw (Exception); DataSet* read(std::istream& is) throw (Exception); DataSet* read(const std::string& path) throw (Exception); /** * @} */ /** * @name The ODataSet interface. * @{ */ void write(std::ostream& os, const DataSet& data_set) const throw (Exception); void write(const std::string& path, const DataSet& data_set, bool overwrite) const throw (Exception); /** * @} */ /** * @name The IOFormat interface * @{ */ const std::string getFormatName() const { return "PopgenlibIO ver 0.1"; } const std::string getFormatDescription() const { return "IO format used to store DataSets inspired from Arlequin and Fasta"; } /** * @} */ }; } // end of namespace bpp; #endif // _POPULIBIO_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/AnalyzedSequences.h000644 000000 000000 00000007336 12147656633 022061 0ustar00rootroot000000 000000 // // File AnalyzedSequences.h // Created by: Sylvain Gaillard // Created on: Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ANALYZEDSEQUENCES_H_ #define _ANALYZEDSEQUENCES_H_ // From Seq #include namespace bpp { /** * @brief The AnalyzedSequences class. * * This is a class to store info about the sequences. * * The object stores a pointer toward a const Alphabet. * The way the pointer is managed depend on the method used to set it. * * If one use a method using a const Alphabet* to set the Alphabet, then he * has to take care of the memory management (i.e. freeing the Alphabet * object). * * If one use a method that create an Alphabet object like those using a * string description of the Alphabet then the AnalyzedSequences object will * delete himself the Alphabet object on destruction. * * Be carefull when copying an AnalyzedSequences object, the way that the * Alphabet object is managed is also copyed then if the initial * AnalyzedSequences takes care of its Alphabet member then the copy will hold * copy af the Alphabet an manage it else the new AnalyzedSequences will just * copy the pointer and it's up to the user to take care of its deletion. * * @author Sylvain Gaillard */ class AnalyzedSequences { private: const Alphabet* alphabet_; bool autoset_; public: // Constructor and destructor AnalyzedSequences(); AnalyzedSequences(const Alphabet* alpha); ~AnalyzedSequences(); // Copie constructor AnalyzedSequences(const AnalyzedSequences& as); AnalyzedSequences& operator=(const AnalyzedSequences& as); public: /** * @brief Set the alphabet used for the sequences. */ void setAlphabet(const Alphabet* alpha); /** * @brief Set the alphabet used for the sequences by alphabet type. */ void setAlphabet(const std::string& alpha_type) throw (Exception); /** * @brief Get the alphabet. */ const Alphabet* getAlphabet() const { return alphabet_; } /** * @brief Get the alphabet type as a string. */ std::string getAlphabetType() const; private: void clear_(); }; } // end of namespace bpp; #endif // _ANALYZEDSEQUENCES_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/MultiAlleleMonolocusGenotype.h000644 000000 000000 00000006742 12147656633 024261 0ustar00rootroot000000 000000 // // File MultiAlleleMonolocusGenotype.h // Author : Sylvain Gaillard // Last modification : Wednesday March 5 2008 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ // Secured inclusion of header's file #ifndef _MULTIALLELEMONOLOCUSGENOTYPE_H_ #define _MULTIALLELEMONOLOCUSGENOTYPE_H_ // From STL #include #include // From local #include "MonolocusGenotype.h" namespace bpp { /** * @brief The MultiAlleleMonolocusGenotype class. * * This class is intended to handle monolocus genotype with many alleles * like polyploid loci or loci obtained by trace file without cutoff on * peaks or other filter. * * @author Sylvain Gaillard */ class MultiAlleleMonolocusGenotype : public MonolocusGenotype { private: std::vector allele_index_; public: // Constructors and destructor /** * @brief Build a monolocus genotype containing many alleles. */ MultiAlleleMonolocusGenotype(std::vector allele_index); /** * @brief Copy constructor. */ MultiAlleleMonolocusGenotype(const MultiAlleleMonolocusGenotype& mmg); /** * @brief Destroy the MultiAlleleMonolocusGenotype. */ ~MultiAlleleMonolocusGenotype(); public: // Other methodes /** * @brief The affectation operator. */ MultiAlleleMonolocusGenotype& operator=(const MultiAlleleMonolocusGenotype& mmg); /** * @brief The == operator. */ bool operator==(const MultiAlleleMonolocusGenotype& mmg) const; /** * @brief Test the homozygozity of the locus (i.e. all allele are identical). */ bool isHomozygous() const; /** * @name The MonolocusGenotype interface: * * @{ */ std::vector getAlleleIndex() const; /** @} */ /** * @name The Clonable interface: * * @{ */ MultiAlleleMonolocusGenotype* clone() const; /** @} */ }; } // end of namespace bpp; #endif // _MULTIALLELEMONOLOCUSGENOTYPE_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/MultilocusGenotype.h000644 000000 000000 00000011522 12147656633 022301 0ustar00rootroot000000 000000 // // File MultilocusGenotype.h // Author : Sylvain Gaillard // Last modification : April 4, 2008 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _MULTILOCUSGENOTYPE_H_ #define _MULTILOCUSGENOTYPE_H_ // From STL #include #include #include // From Pop #include "MonolocusGenotype.h" #include "MonolocusGenotypeTools.h" #include "BiAlleleMonolocusGenotype.h" #include "MonoAlleleMonolocusGenotype.h" #include "LocusInfo.h" namespace bpp { /** * @brief The MultilocusGenotype class. * * This is a MonolocusGenotype containor. * * @author Sylvain Gaillard */ class MultilocusGenotype { private: std::vector loci_; public: // Constructors and Destructor /** * @brief Build a MultilocusGenotype linked to an AnalyzedLoci object. * * @throw BadIntegerException if loci_number < 1. */ MultilocusGenotype(size_t loci_number) throw (BadIntegerException); /** * @brief Copy constructor. */ MultilocusGenotype(const MultilocusGenotype& genotype); /** * @brief Destroy a MultilocusGenotype. */ ~MultilocusGenotype(); public: /** * @brief Set a MonolocusGenotype. */ void setMonolocusGenotype(size_t locus_position, const MonolocusGenotype& monogen) throw (IndexOutOfBoundsException); /** * @brief Set a MonolocusGenotype by allele keys. * * @throw IndexOutOfBoundsException if locus_position excedes the number of loci. * @throw Exception if there is no key in allele_keys. */ void setMonolocusGenotypeByAlleleKey(size_t locus_position, const std::vector& allele_keys) throw (Exception); /** * @brief Set a MonolocusGenotype by allele id. * * @throw IndexOutOfBoundsException if locus_position excedes the number of loci. * @throw AlleleNotFoundException if at least one of the id is not found in the LocusInfo. */ void setMonolocusGenotypeByAlleleId(size_t locus_position, const std::vector& allele_id, const LocusInfo& locus_info) throw (Exception); /** * @brief Set a MonolocusGenotype as missing data. * * @throw IndexOutOfBoundsException if locus_position excedes the number of loci. */ void setMonolocusGenotypeAsMissing(size_t locus_position) throw (IndexOutOfBoundsException); /** * @brief Tell if a MonolocusGenotype is a missing data. * * @throw IndexOutOfBoundsException if locus_position excedes the number of loci. */ bool isMonolocusGenotypeMissing(size_t locus_position) const throw (IndexOutOfBoundsException); /** * @brief Get a MonolocusGenotype. */ const MonolocusGenotype& getMonolocusGenotype(size_t locus_position) const throw (IndexOutOfBoundsException); /** * @brief Count the number of loci. * * Return the size of _loci. */ size_t size() const; /** * @brief Count the number of non missing MonolocusGenotype. */ size_t countNonMissingLoci() const; /** * @brief Count the number of homozygous MonolocusGenotype. */ size_t countHomozygousLoci() const; /** * @brief Count the number of heterozygous MonolocusGenotype. */ size_t countHeterozygousLoci() const; }; } // end of namespace bpp; #endif // _MULTILOCUSGENOTYPE_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/SequenceStatistics.h000644 000000 000000 00000123345 12147656633 022260 0ustar00rootroot000000 000000 // // File SequenceStatistics.h // Authors: Eric Bazin // Sylvain Gaillard // Khalid Belkhir // Benoit Nabholz // Created on: Wed Aug 04 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ // Secured inclusion of header's file #ifndef _SEQUENCESTATISTICS_H_ #define _SEQUENCESTATISTICS_H_ // From the SeqLib library #include #include #include #include #include #include #include "PolymorphismSequenceContainer.h" // From the STL #include #include #include namespace bpp { /** * @brief Static class providing methods to compute statistics on sequences data. * * @author Sylvain Gaillard */ class SequenceStatistics { public: /** * @brief Compute the number of polymorphic site in an alignment * * The number of polymorphic site is also known as the number of * segregating site @f$S@f$. * * Gaps are consider as mutations so if you want number of * polymorphic site without gap, set the gapflag parameter to true. * * @param psc a PolymorphismSequenceContainer * @param gapflag a boolean set by default to true if you don't want to * take gap into account * @param ignoreUnknown a boolean set by default to true to ignore * unknown states */ static size_t polymorphicSiteNumber( const PolymorphismSequenceContainer& psc, bool gapflag = true, bool ignoreUnknown = true); /** * @brief Compute the number of parsimony informative sites in an alignment * * @param psc a PolymorphicSequenceContainer * @param gapflag a boolean set by default to true if you don't want to * take gap into account */ static size_t parsimonyInformativeSiteNumber( const PolymorphismSequenceContainer& psc, bool gapflag = true); /** * @brief Count the number of singleton nucleotides in an alignment. * * @param psc a PolymorphismSequenceContainer * @param gapflag a boolean set by default to true if you don't want to * take gap into account * @author Sylvain Gaillard */ static size_t countSingleton( const PolymorphismSequenceContainer& psc, bool gapflag = true); /** * @brief Count the total number of mutations in an alignment. * * This count is assumed to be under an infinite site model. * * @param psc a PolymorphismSequenceContainer * @param gapflag a boolean set by default to true if you don't want to * take gap into account * @author Sylvain Gaillard */ static size_t totNumberMutations( const PolymorphismSequenceContainer& psc, bool gapflag = true); /** * @brief Count the total number of mutations in external branchs. * * This is counted as the number of distinct singleton nucleotide * in the ingroup that are not shared with the outgroup. * A site is ignored if it contains more than one variant in the outgroup. * A site is ignored if it contains unresolved variants or gaps. * * @param ing a PolymorphismSequenceContainer the ingroup alignement * @param outg a PolymorphismSequenceContainer the outgroup alignement * @throw Exception if ing and outg are not of the same size (site number) * @author Khalid Belkhir */ static size_t totMutationsExternalBranchs( const PolymorphismSequenceContainer& ing, const PolymorphismSequenceContainer& outg) throw (Exception); /** * @brief Compute the number of triplet in an alignment * * @param psc a PolymorphismSequenceContainer * @param gapflag a boolean set by default to true if you don't want to take gap into account * @author Sylvain Glémin */ static size_t tripletNumber( const PolymorphismSequenceContainer& psc, bool gapflag = true); /** * @brief Compute the sum of per site heterozygosity in an alignment * * @param psc a PolymorphismSequenceContainer * @param gapflag a boolean set by default to true if you don't want to take gap into account */ static double heterozygosity( const PolymorphismSequenceContainer& psc, bool gapflag = true); /** * @brief Compute the sum of per site squared heterozygosity in an alignment * * @param psc a PolymorphismSequenceContainer * @param gapflag a boolean set by default to true if you don't want * to take gap into account */ static double squaredHeterozygosity( const PolymorphismSequenceContainer& psc, bool gapflag = true); /** * @brief Compute the mean GC content in an alignment * * @param psc a PolymorphismSequenceContainer */ static double gcContent( const PolymorphismSequenceContainer& psc); /** * @brief Return the number of GC alleles and the total number of alleles at polymorphic sites only * * G vs C and A vs T polymorphism are not taken into account * * @warning SG 15/03/2010: The code of this method is not clear. See * implementation for more details. * * @param psc a PolymorphismSequenceContainer * @param gapflag a boolean set by default to true if you don't want * to take gap into account * @return A std::vector of size 2 containing the number of GC alleles * and the total number of alleles. */ static std::vector gcPolymorphism( const PolymorphismSequenceContainer& psc, bool gapflag = true); /** * @brief Compute diversity estimator Theta of Watterson (1975, Theor Popul Biol, 7 pp256-276) * * @f[ * \hat{\theta}_S=\frac{S}{a_1} * @f] * where @f$S@f$ is the number of polymorphic sites and @f$a_1@f$ is * describe in SequenceStatistics::_getUsefullValues(). * * @param psc a PolymorphismSequenceContainer * @param gapflag flag set by default to true if you don't want to * take gap into account * @param ignoreUnknown a boolean set by default to true to ignore * unknown states * @author Sylvain Gaillard */ static double watterson75( const PolymorphismSequenceContainer& psc, bool gapflag = true, bool ignoreUnknown = true); /** * @brief Compute diversity estimator Theta of Tajima (1983, Genetics, 105 pp437-460) * * @f[ * \hat{\theta}_\pi=1-\sum_{i=1}^{S}\sum_{j=1}^{4}\frac{k_{j,i}\times\left(k_{j,i}-1\right)} * {n_i\times\left(n_i-1\right)} \qquad \textrm{with }k_{j,i}>0 * @f] * where @f$k_{j,i}@f$ is the count of the jth state at the * ith site, * @f$n_i@f$ the number of nucleotides and @f$S@f$ the number of * polymorphic sites. * * @param psc a PolymorphismSequenceContainer * @param gapflag flag set by default to true if you don't want to * take gap into account * @author Sylvain Gaillard */ static double tajima83( const PolymorphismSequenceContainer& psc, bool gapflag = true); /** * @brief Compute diversity estimator Theta H (eq. 3) of Fay and Wu (2000, Genetics, 155: 1405-1413) * * @param psc a PolymorphismSequenceContainer * @param ancestralSites a Sequence containing the ancestral states * (reconstructed independently) to fold the mutation in the psc SequenceContainer. @author Benoit Nabholz */ static double FayWu2000( const PolymorphismSequenceContainer& psc, const Sequence& ancestralSites); /** * @brief Return the number of haplotype in the sample. * Depaulis and Veuille (1998, Mol Biol Evol, 12 pp1788-1790) * * @param psc a PolymorphismSequenceContainer * @param gapflag flag set by default to true if you don't want to * take gap into account * @author Éric Bazin * @todo * - remove unneeded Sequence Container recopy * - work on Sequence rather on string */ static size_t DVK( const PolymorphismSequenceContainer& psc, bool gapflag = true); /** * @brief Return the haplotype diversity of a sample. * Depaulis and Veuille (1998, Mol Biol Evol, 12 pp1788-1790) * * @param psc a PolymorphismSequenceContainer * @param gapflag flag set by default to true if you don't want to * take gaps into account * @author Éric Bazin * @todo * - remove unneeded Sequence Container recopy * - work on Sequence rather on string */ static double DVH( const PolymorphismSequenceContainer& psc, bool gapflag = true); /** * @brief Return the number of transitions. * * @param psc a PolymorphismSequenceContainer * @author Éric Bazin */ static size_t getNumberOfTransitions( const PolymorphismSequenceContainer& psc); /** * @brief Return the number of transversions. * * @param psc a PolymorphismSequenceContainer * @author Éric Bazin */ static size_t getNumberOfTransversions( const PolymorphismSequenceContainer& psc); /** * @brief Return the ratio of transitions/transversions. * * @param psc a PolymorphismSequenceContainer * @author Éric Bazin */ static double getTransitionsTransversionsRatio( const PolymorphismSequenceContainer& psc ) throw (Exception); /** * @brief Compute the number of codon sites with stop codon * * @param psc a PolymorphismSequenceContainer * @param gapflag a boolean set by default to true if you don't want to * take gaps into account * @author Sylvain Glémin */ static size_t stopCodonSiteNumber( const PolymorphismSequenceContainer& psc, bool gapflag = true); /** * @brief Compute the number of polymorphic codon with only one mutated site * * @param psc a PolymorphismSequenceContainer * @param stopflag a boolean set by default to true if you don't want * to take stop codon neither undefined sites into account * @param gapflag a boolean set by default to true if you don't want * to take gaps into account * @author Sylvain Glémin * @bug Sylvain Gaillard 17/03/2010: stopflag don't work as expected * because CompleteSiteIterator don't skip stop codon. */ static size_t monoSitePolymorphicCodonNumber( const PolymorphismSequenceContainer& psc, bool stopflag = true, bool gapflag = true); /** * @brief Compute the number of synonymous polymorphic codon sites * * Gaps and unresolved sites are automatically excluded * * @param psc a PolymorphismSequenceContainer * @param gc a GeneticCode * @author Sylvain Glémin * @author Éric Bazin */ static size_t synonymousPolymorphicCodonNumber( const PolymorphismSequenceContainer& psc, const GeneticCode& gc); /** * @brief Compute the Watterson(1975,Theor Popul Biol, 7 pp256-276) estimator for synonymous positions * * Gaps and unresolved sites are automatically excluded * * In case of complex codon, the path that gives the minimum number of * non-synonymous changes* is chosen. The argument minchange=true is sent * to numberOfSynonymousDifferences used in this method. * Otherwise, a non-integer number could be return. * * @param psc a PolymorphismSequenceContainer * @param gc a GeneticCode * @author Sylvain Glémin */ static double watterson75Synonymous( const PolymorphismSequenceContainer& psc, const GeneticCode& gc); /** * @brief Compute the Watterson(1975, Theor Popul Biol, 7 pp256-276) estimator for non synonymous positions * * Gaps and unresolved sites are automatically excluded * * In case of complex codon, the path that gives the minimum number of * non-synonymous changes is chosen. The argument minchange=true is sent * to numberOfSynonymousDifferences used in this method. * Otherwise, a non-integer number could be return. * * @param psc a PolymorphismSequenceContainer * @param gc a GeneticCode * @author Sylvain Glémin */ static double watterson75NonSynonymous( const PolymorphismSequenceContainer& psc, const GeneticCode& gc); /** * @brief Compute the synonymous nucleotide diversity, pi * * Gaps and unresolved sites are automatically excluded * If minchange = false (default option) the different paths are equally * weighted. * If minchange = true the path with the minimum number of non-synonymous * change is chosen. * * @param psc a PolymorphismSequenceContainer * @param gc a GeneticCode * @param minchange a boolean set to false * @author Sylvain Glémin * @author Éric Bazin */ static double piSynonymous( const PolymorphismSequenceContainer& psc, const GeneticCode& gc, bool minchange = false); /** * @brief Compute the non-synonymous nucleotide diversity, pi * * Gaps and unresolved sites are automatically excluded * If minchange = false (default option) the different paths are equally * weighted. * If minchange = true the path with the minimum number of non-synonymous * change is chosen. * * @param psc a PolymorphismSequenceContainer * @param gc a GeneticCode * @param minchange a boolean set by default to false * @author Sylvain Glémin * @author Éric Bazin */ static double piNonSynonymous( const PolymorphismSequenceContainer& psc, const GeneticCode& gc, bool minchange = false); /** * @brief compute the mean number of synonymous site in an alignment * * A site is x% synonymous if x% of possible mutations are synonymous. * The transition/transversion can be taken into account (use the * variable ratio). * Gaps and unresolved sites are automatically excluded. * * @param psc a PolymorphismSequenceContainer * @param gc a GeneticCode * @param ratio a double * @author Sylvain Glémin * @author Éric Bazin */ static double meanSynonymousSitesNumber( const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double ratio = 1.); /** * @brief compute the mean number of non-synonymous site in an alignment * * A site is x% synonymous if x% of possible mutations are synonymous * The transition/transversion can be taken into account (use the * variable ratio). * Gaps are automatically excluded * * @param psc a PolymorphismSequenceContainer * @param gc a GeneticCode * @param ratio a double * @author Éric Bazin */ static double meanNonSynonymousSitesNumber( const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double ratio = 1.); /** * @brief compute the number of synonymous subsitutions in an alignment * * Gaps and unresolved sites are automatically excluded * * In case of complex codon, the path that gives the minimum number of * non-synonymous changes is chosen. The argument minchange=true is sent * to numberOfSynonymousDifferences used in this method. * Otherwise, a non-integer number could be return. * * @param psc a PolymorphismSequenceContainer * @param gc a GeneticCode * @param freqmin a double, to exclude snp in frequency strictly lower * than freqmin */ static size_t synonymousSubstitutionsNumber( const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double freqmin = 0.); /** * @brief compute the number of non synonymous subsitutions in an alignment * * Gaps and unresolved sites are automatically excluded * * In case of complex codon, the path that gives the minimum number of * non-synonymous changes is chosen. The argument minchange=true is sent * to numberOfSynonymousDifferences used in this method. * Otherwise, a non-integer number could be return. * * @param psc a PolymorphismSequenceContainer * @param gc a GeneticCode * @param freqmin a double, to exclude snp in frequency strictly lower * than freqmin */ static size_t nonSynonymousSubstitutionsNumber( const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double freqmin = 0.); /** * @brief compute the number of fixed differences between two alignements * * Gaps and unresolved sites are automatically excluded * * In case of complex codon, the path that gives the minimum number of * non-synonymous changes is chosen. The argument minchange=true is sent * to numberOfSynonymousDifferences used in this method. * Otherwise, a non-integer number could be return. * @param pscin a PolymorphismSequenceContainer * @param pscout a PolymorphismSequenceContainer * @param psccons a PolymorphismSequenceContainer * @param gc a GeneticCode * @author Sylvain Glémin * @bug Sylvain Gaillard 17.03.2010: should throw something if pscin, * pscout and psccons have different length (site number). */ static std::vector fixedDifferences( const PolymorphismSequenceContainer& pscin, const PolymorphismSequenceContainer& pscout, PolymorphismSequenceContainer& psccons, const GeneticCode& gc); /** * @brief return a vector containing Pa, Ps, Da, Ds * * Gaps and unresolved sites are automatically excluded * @param ingroup a PolymorphismSequenceContainer * @param outgroup a PolymorphismSequenceContainer * @param gc a GeneticCode * @param freqmin a double, to exclude snp in frequency strictly lower * than freqmin * @author Sylvain Glémin */ static std::vector MKtable( const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, const GeneticCode& gc, double freqmin = 0.); /** * @brief return the neutrality index NI = (Pa/Ps)/(Da/Ds) (Rand & Kann 1996, Mol. Biol. Evol. 13 pp735-748) * * Return -1 if Ps or Da are zero * Gaps and unresolved sites are automatically excluded * * @param ingroup a PolymorphismSequenceContainer * @param outgroup a PolymorphismSequenceContainer * @param gc a GeneticCode * @param freqmin a double, to exclude snp in frequency strictly lower * than freqmin * @author Sylvain Glémin */ static double neutralityIndex( const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, const GeneticCode& gc, double freqmin = 0.); /** * @brief Return the Tajima's D test (Tajima 1989, Genetics 123 pp 585-595). * * Calculation using the number of polymorphic (segregating) sites. * @f[ * D=\frac{\hat{\theta}_\pi-\hat{\theta}_S}{\sqrt{\textrm{V}\left(\hat{\theta}_\pi-\hat{\theta}_S\right)}} * =\frac{\hat{\theta}_\pi-\hat{\theta}_S}{\sqrt{e_1S+e_2S(S-1)}} * @f] * * @param psc a PolymorphismSequenceContainer * @param gapflag flag set by default to true if you don't want to * take gap into account * @throw ZeroDivisionException if S == 0 * @author Sylvain Gaillard */ static double tajimaDSS( const PolymorphismSequenceContainer& psc, bool gapflag = true) throw (ZeroDivisionException); /** * @brief Return the Tajima's D test (Tajima 1989, Genetics 123 pp 585-595). * * Calculation using the total number of mutation. * @f[ * D=\frac{\hat{\theta}_\pi-\frac{\eta}{a_1}}{\sqrt{e_1\eta+e_2\eta(\eta-1)}} * @f] * @param psc a PolymorphismSequenceContainer * @param gapflag flag set by default to true if you don't want to * take gap into account * @throw ZeroDivisionException if eta == 0 * @author Sylvain Gaillard */ static double tajimaDTNM( const PolymorphismSequenceContainer& psc, bool gapflag = true) throw (ZeroDivisionException); /** * @brief Return the Fu and Li D test (Fu & Li 1993, Genetics, 133 pp693-709). * * @param ingroup a PolymorphismSequenceContainer * @param outgroup a PolymorphismSequenceContainer * @param original true: use the Fu & Li methode, false: use mutations in * external branch. * @throw ZeroDivisionException if eta == 0 * @author Sylvain Gaillard * @author Khalid Belkhir * * If one set original=false then the number of mutations will be used. * If the outgroup contains more than one sequence the sites with more * than one variant will not be considered for external branch mutations! */ static double fuliD( const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, bool original = true) throw (ZeroDivisionException); /** * @brief Return the Fu and Li D* test (Fu & Li 1993, Genetics, 133 pp693-709). * * @param group a PolymorphismSequenceContainer * @author Sylvain Gaillard */ static double fuliDstar( const PolymorphismSequenceContainer& group) throw (ZeroDivisionException); /** * @brief Return the Fu and Li F test (Fu & Li 1993, Genetics, 133 pp693-709). * * @param ingroup a PolymorphismSequenceContainer * @param outgroup a PolymorphismSequenceContainer * @param original true: use the Fu & Li methode, false: use mutations in * external branch. * @author Sylvain Gaillard * @author Khalid Belkhir * * If one set original=false then the number of mutations will be used. * If the outgroup contains more than one sequence the sites with more * than one variant will not be considered for external branch mutations! */ static double fuliF( const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, bool original = true) throw (ZeroDivisionException); /** * @brief Return the Fu and Li F* test (Fu & Li 1993, Genetics, 133 pp693-709). * * @param group a PolymorphismSequenceContainer * @author Sylvain Gaillard */ static double fuliFstar( const PolymorphismSequenceContainer& group) throw (ZeroDivisionException); /** * Fst of Hudson, Slatkin and Maddison * * Taken from eq. 3 of Hudson, Slatkin and Maddison 1992 Genetics 132:153 * * @f[ * F_{st} = 1 - \frac{H_w}{H_b} * @f] * where @f$H_w@f$ is mean number of differences between different * sequences sampled from the same subpopulation, and @f$H_b@f$ is the * mean number of differences between sequences sampled from the two * different subpopulations sampled. * * @param psc a PolymorphismSequenceContainer will at least two populations * @param id1 is the id of the population 1 * @param id2 is the id of the population 2 * @author Benoit Nabholz */ double FstHudson92( const PolymorphismSequenceContainer& psc, size_t id1, size_t id2); /** * @brief generate a special PolymorphismSequenceContainer for linkage disequilbrium analysis * * Create a PolymorphismSequenceContainer with only polymorphic site : * The value 1 is assigned to the most frequent allele, and 0 to the * least frequent. * This psc is needed to compute Linkage Disequilibrium * Statistics. * Should be used before excluding gaps, but sites with gaps are not * counted as polymorphic sites. * Singleton can be excluded. * Polymorphic site with the lowest frequency < threshold can be excluded. * Only polymorphic sites with 2 alleles are kept. * * @param psc a PolymorphismSequenceContainer * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @author Sylvain Glémin * @bug Sylvain Gaillard 17/03/2010: Needs cleaning and lack of tests * of usability. This methode assume that psc as a DNA alphabet but don't * check for conformity. * @todo * - To be moved to PolymorphismSequenceContainerTools. */ static PolymorphismSequenceContainer* generateLDContainer( const PolymorphismSequenceContainer& psc, bool keepsingleton = true, double freqmin = 0.); /** * @brief give the vector of the pairwise distances between site positions corresponding to a LD SequencePolymorphismContainer * * Assume that all sequences have the same length * * @param psc a PolymorphismSequenceContainer * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites is lower than 2 * @author Sylvain Glémin */ static Vdouble pairwiseDistances1( const PolymorphismSequenceContainer& psc, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give the vector of all mean pairwise distance between two sites to a LD SequencePolymorphismContainer * * pairwise distances are computed for each sequence separately, * excluding gaps. Then the mean is taken over all the sequences. * * @param psc a PolymorphismSequenceContainer * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites is lower than 2 * @author Sylvain Glémin */ static Vdouble pairwiseDistances2( const PolymorphismSequenceContainer& psc, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give the vector of all mean pairwise D value between two sites (Lewontin & Kojima 1964, Evolution 14 pp458-472) * * @param psc a PolymorphismSequenceContainer * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites or the number of * sequences is lower than 2 * @author Sylvain Glémin */ static Vdouble pairwiseD( const PolymorphismSequenceContainer& psc, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give the vector of all mean pairwise D' value between two sites (Lewontin 1964, Genetics 49 pp49-67)) * * @param psc a PolymorphismSequenceContainer * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites or the number of * sequences is lower than 2 * @author Sylvain Glémin */ static Vdouble pairwiseDprime( const PolymorphismSequenceContainer& psc, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give the vector of all mean pairwise R² value between two sites (Hill & Robertson 1968, Theor. Appl. Genet., 38 pp226-231) * * @param psc a PolymorphismSequenceContainer * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites or the number of * sequences is lower than 2 * @author Sylvain Glémin */ static Vdouble pairwiseR2( const PolymorphismSequenceContainer& psc, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give mean D over all pairwise comparisons * * @param psc a PolymorphismSequenceContainer * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites or the number of * sequences is lower than 2 * @author Sylvain Glémin */ static double meanD( const PolymorphismSequenceContainer& psc, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give mean D' over all pairwise comparisons * * @param psc a PolymorphismSequenceContainer * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites or the number of * sequences is lower than 2 * @author Sylvain Glémin */ static double meanDprime( const PolymorphismSequenceContainer& psc, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give mean R² over all pairwise comparisons * * @param psc a PolymorphismSequenceContainer * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites or the number of * sequences is lower than 2 * @author Sylvain Glémin */ static double meanR2( const PolymorphismSequenceContainer& psc, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give mean pairwise distances between sites / method 1: differences between sequences are not taken into account * * @param psc a PolymorphismSequenceContainer * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites is lower than 2 * @author Sylvain Glémin */ static double meanDistance1( const PolymorphismSequenceContainer& psc, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give mean pairwise distances between sites / method 2: differences between sequences are taken into account * * @param psc a PolymorphismSequenceContainer * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites is lower than 2 * @author Sylvain Glémin */ static double meanDistance2( const PolymorphismSequenceContainer& psc, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give the slope of the regression |D| = 1+a*distance * * The slope is given in |D| per kb * * @param psc a PolymorphismSequenceContainer * @param distance1 a boolean (true to use distance1, false to use * distance2, false by default) * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites or the number of * sequences is lower than 2 * @author Sylvain Glémin */ static double originRegressionD( const PolymorphismSequenceContainer& psc, bool distance1 = false, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give the slope of the regression |D'| = 1+a*distance * * The slope is given in |D'| per kb * * @param psc a PolymorphismSequenceContainer * @param distance1 a boolean (true to use distance1, false to use * distance2, false by default) * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites or the number of * sequences is lower than 2 * @author Sylvain Glémin */ static double originRegressionDprime( const PolymorphismSequenceContainer& psc, bool distance1 = false, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give the slope of the regression R² = 1+a*distance * * The slope is given in R² per kb * * @param psc a PolymorphismSequenceContainer * @param distance1 a boolean (true to use distance1, false to use * distance2, false by default) * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites or the number of * sequences is lower than 2 * @author Sylvain Glémin */ static double originRegressionR2( const PolymorphismSequenceContainer& psc, bool distance1 = false, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give the slope and the origin of the regression |D| = a*distance+b * * The slope is given in |D| per kb * * @param psc a PolymorphismSequenceContainer * @param distance1 a boolean (true to use distance1, false to use * distance2, false by default) * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites or the number of * sequences is lower than 2 * @author Sylvain Glémin */ static Vdouble linearRegressionD( const PolymorphismSequenceContainer& psc, bool distance1 = false, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give the slope and the origin of the regression |D'| = a*distance+b * * The slope is given in |D'| per kb * * @param psc a PolymorphismSequenceContainer * @param distance1 a boolean (true to use distance1, false to use * distance2, false by default) * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites or the number of * sequences is lower than 2 * @author Sylvain Glémin */ static Vdouble linearRegressionDprime( const PolymorphismSequenceContainer& psc, bool distance1 = false, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give the slope and the origin of the regression R² = a*distance+b * * The slope is given in R² per kb * * @param psc a PolymorphismSequenceContainer * @param distance1 a boolean (true to use distance1, false to use * distance2, false by default) * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites or the number of * sequences is lower than 2 * @author Sylvain Glémin */ static Vdouble linearRegressionR2( const PolymorphismSequenceContainer& psc, bool distance1 = false, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give the slope of the regression R² = 1/(1+a*distance) * * To fit the theoretical prediction R² = 1/(1+4Nr) * The slope is given in R² per kb * * @param psc a PolymorphismSequenceContainer * @param distance1 a boolean (true to use distance1, false to use * distance2, false by default) * @param keepsingleton a boolean (true by default, false to exclude * singleton) * @param freqmin a float (to exlude site with the lowest allele * frequency less than the threshold given by freqmin, 0 by default) * @throw DimensionException if the number of sites or the number of * sequences is lower than 2 * @author Sylvain Glémin */ static double inverseRegressionR2( const PolymorphismSequenceContainer& psc, bool distance1 = false, bool keepsingleton = true, double freqmin = 0.) throw (DimensionException); /** * @brief give estimate of C=4Nr using Hudson method (Hudson 1987, Genet. Res., 50 pp245-250) * * @param psc a PolymorphismSequenceContainer * @param precision default value = 0.000001 * @param cinf initial value, by default cinf=0.001 * @param csup initial value, by default csup = 10000 * @author Sylvain Glémin */ static double hudson87( const PolymorphismSequenceContainer& psc, double precision = 0.000001, double cinf = 0.001, double csup = 10000.); /** * @brief Test usefull values * @param s a ostream where write the values * @param n then number of observed sequences * @author Sylvain Gaillard */ static void testUsefullValues( std::ostream& s, size_t n); private: /** * @brief Count the number of mutation for a site. */ static size_t getMutationNumber_( const Site& site); /** * @brief Count the number of singleton for a site. */ static size_t getSingletonNumber_( const Site& site); /** * @brief Count the number of singleton for a site. * * will count singletons that are not in site_out (a site in outgroup) * site_in is a site from an ingroup * @author Khalid Belkhir */ static size_t getDerivedSingletonNumber_( const Site& site_in, const Site& site_out); /** * @brief Get usefull values for theta estimators. * * @param n the number of observed sequences * * @return A map with 11 values. Keys are a1, a2, a1n, b1, b2, c1, c2, * cn, dn, e1 and e2. * The values are : * @f[ * a_1=\sum_{i=1}^{n-1}\frac{1}{i} \qquad a_2=\sum_{i=1}^{n-1}\frac{1}{i^2} * @f] * @f[ * a_{1n}=\sum_{i=1}^{n}\frac{1}{i} * @f] * @f[ * b_1=\frac{n+1}{3(n-1)} \qquad b_2=\frac{2(n^2+n+3)}{9n(n-1)} * @f] * @f[ * c_1=b_1-\frac{1}{a_1} \qquad c_2=b_2-\frac{n+2}{a_1n}+\frac{a_2}{a_1^2} * @f] * @f[ * c_n=2\frac{na_1-2(n-1)}{(n-1)(n-2)} * @f] * @f[ * d_n=c_n+\frac{n-2}{(n-1)^2}+\frac{2}{n-1}\left(\frac{3}{2}-\frac{2a_{1n}-3}{n-2}-\frac{1}{n}\right) * @f] * @f[ * e_1=\frac{c_1}{a_1} \qquad e_2=\frac{c_2}{a_1^2+a_2} * @f] * where @f$n@f$ is the number of observed sequences. * * @author Sylvain Gaillard */ static std::map getUsefullValues_( size_t n); /** * @brief Get the vD value of equation (32) in Fu & Li 1993, Genetics, 133 pp693-709) * * @param n the number of observed sequences * @param a1 as describe in getUsefullValues * @param a2 as describe in getUsefullValues * @param cn as describe in getUsefullValues * * @return the vD value as double * * @author Sylvain Gaillard */ static double getVD_( size_t n, double a1, double a2, double cn); /** * @brief Get the uD value of equation (32) in Fu & Li 1993, Genetics, 133 pp693-709) * * @param a1 as describe in getUsefullValues * @param vD as provided by getVD_ * * @return the uD value as double * * @author Sylvain Gaillard */ static double getUD_( double a1, double vD); /** * @brief Get the vD* value of D* equation in Fu & Li 1993, Genetics, 133 pp693-709) * * @param n the number of observed sequences * @param a1 as describe in getUsefullValues * @param a2 as describe in getUsefullValues * @param dn as describe in getUsefullValues * * @return the vD* value as double * * @author Sylvain Gaillard */ static double getVDstar_( size_t n, double a1, double a2, double dn); /** * @brief Get the uD* value of D* equation in Fu & Li 1993, Genetics, 133 pp693-709) * * @param n the number of observed sequences * @param a1 as describe in getUsefullValues * @param vDs as provided by getVDstar_ * * @return the uD* value as double * * @author Sylvain Gaillard */ static double getUDstar_( size_t n, double a1, double vDs); /** * @brief give the left hand term of equation (4) in Hudson (Hudson 1987, Genet. Res., 50 pp245-250) * This term is used in hudson87 * @param psc a PolymorphismSequenceContainer */ static double leftHandHudson_( const PolymorphismSequenceContainer& psc); /** * @brief give the right hand term of equation (4) in Hudson (Hudson 1987, Genet. Res., 50 pp245-250) * This term is used in hudson87 */ static double rightHandHudson_( double c, size_t n); /************************************************************************/ }; } // end of namespace bpp; #endif // _SEQUENCESTATISTICS_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/Date.cpp000644 000000 000000 00000010313 12147656633 017633 0ustar00rootroot000000 000000 // // File Date.cpp // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include // From Local #include "Date.h" using namespace bpp; using namespace std; // ** Class constructor: *******************************************************/ Date::Date(const int day, const int month, const int year) throw (BadIntegerException) : day_(day), month_(month), year_(year) { if (day < 1 || day > 31) throw (BadIntegerException("Date::Date: day must be in [1;31].", day)); if (month < 1 || month > 12) throw (BadIntegerException("Date::Date: month must be in [1;12].", month)); } Date::Date(const Date& date) : day_(date.getDay()), month_(date.getMonth()), year_(date.getYear()) {} // ** Class destructor: ********************************************************/ Date::~Date() {} // ** Other methodes: **********************************************************/ Date& Date::operator=(const Date& date) { day_ = date.getDay(); month_ = date.getMonth(); year_ = date.getYear(); return *this; } void Date::setDate(const int day, const int month, const int year) throw (BadIntegerException) { if (day >= 1 && day <= 31) day_ = day; else throw (BadIntegerException("Date::Date: day must be in [1;31].", day)); if (month >= 1 && month <= 12) month_ = month; else throw (BadIntegerException("Date::Date: month must be in [1;12].", month)); year_ = year; } void Date::setYear(const int year) { year_ = year; } void Date::setMonth(const int month) throw (BadIntegerException) { if (month >= 1 && month <= 12) month_ = month; else throw (BadIntegerException("Date::Date: month must be in [1;12].", month)); } void Date::setDay(const int day) throw (BadIntegerException) { if (day >= 1 && day <= 31) day_ = day; else throw (BadIntegerException("Date::Date: day must be in [1;31].", day)); } std::string Date::getDateStr() const { string date, uDay = "", uMonth = ""; if (day_ < 10) uDay = "0"; if (month_ < 10) uMonth = "0"; date = uDay + TextTools::toString(day_) + uMonth + TextTools::toString(month_) + TextTools::toString(year_); return date; } bool Date::operator==(const Date& date) const { if (day_ == date.getDay() && month_ == date.getMonth() && year_ == date.getYear()) return true; else return false; } bool Date::operator<(const Date& date) const { if (year_ < date.getYear() || (month_ < date.getMonth() && year_ == date.getYear()) || (day_ < date.getDay() && month_ == date.getMonth() && year_ == date.getYear())) return true; else return false; } bpp-popgen-2.1.0/src/Bpp/PopGen/PolymorphismMultiGContainer.h000644 000000 000000 00000013552 12147656633 024122 0ustar00rootroot000000 000000 // // File PolymorphismMultiGContainer.h // Author : Sylvain Gaillard // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _POLYMORPHYSMMULTIGCONTAINER_H_ #define _POLYMORPHYSMMULTIGCONTAINER_H_ // From Utils #include #include #include #include // From popgenlib #include "MultilocusGenotype.h" #include "GeneralExceptions.h" // From STL #include #include #include #include namespace bpp { /** * @brief The PolymorphismMultiGContainer class * * This class is a container of MultilocusGenotype. * * @author Sylvain Gaillard */ class PolymorphismMultiGContainer { private: std::vector multilocusGenotypes_; std::vector groups_; // group id for each multilocusgenotype std::map groups_names_; public: // Constructors and destructor /** * @brief Build a new PolymorphismMultilocusGenotypeContainer. */ PolymorphismMultiGContainer(); /** * @brief The copy constructor. */ PolymorphismMultiGContainer(const PolymorphismMultiGContainer& pmgc); /** * @brief Destroy a PolymorphismMultilocusGenotypeContainer. */ ~PolymorphismMultiGContainer(); public: /** * @brief The assignation operator=. */ PolymorphismMultiGContainer& operator=(const PolymorphismMultiGContainer& pmgc); /** * @brief Add a MultilocusGenotype to the container. */ void addMultilocusGenotype(const MultilocusGenotype& mg, size_t group); /** * @brief Get a MultilocusGenotype at a position. * * @throw IndexOutOfBoundsException if position excedes the size of the container. */ const MultilocusGenotype* getMultilocusGenotype(size_t position) const throw (IndexOutOfBoundsException); /** * @brief Remove a MultilocusGenotype. * * @throw IndexOutOfBoundsException if position excedes the size of the container. */ MultilocusGenotype* removeMultilocusGenotype(size_t position) throw (IndexOutOfBoundsException); /** * @brief Delete a MultilocusGenotype. * * @throw IndexOutOfBoundsException if position excedes the size of the container. */ void deleteMultilocusGenotype(size_t position) throw (IndexOutOfBoundsException); /** * @brief Tell if the MultilocusGenotypes are aligned (i.e. same size). */ bool isAligned() const; /** * @brief Get the number of loci if the MultilocusGenotypes are aligned. * * @throw Exception if MultilocusGenotypes are not aligned. */ size_t getNumberOfLoci() const throw (Exception); /** * @brief Get the Group id of a MultilocusGenotype. * * @throw IndexOutOfBoundsException if position excedes the size of the container. */ size_t getGroupId(size_t position) const throw (IndexOutOfBoundsException); /** * @brief Set the Group id of a MultilocusGenotype. * * @throw IndexOutOfBoundsException if position excedes the size of the container. */ void setGroupId(size_t position, size_t group_id) throw (IndexOutOfBoundsException); /** * @brief Get the groups' ids. */ std::set getAllGroupsIds() const; /** * @brief Get the groups names or ids if not available */ std::vector getAllGroupsNames() const; /** * @brief Tell if a group exists. */ bool groupExists(size_t group) const; /** * @brief Get the number of groups. */ size_t getNumberOfGroups() const; /** * @brief Get group size. */ size_t getGroupSize(size_t group) const; /** * @brief Get the group name for a given group id or just the id if not available juste return it's id */ std::string getGroupName(size_t group_id) const throw (GroupNotFoundException); /** * @brief Set the name for the given group id. */ void setGroupName(size_t group_id, std::string name) throw (GroupNotFoundException); /** * @brief Inserts a name for the given group id. */ void addGroupName(size_t group_id, std::string name); /** * @brief Get the size of a group for a given locus. */ size_t getLocusGroupSize(size_t group, size_t locus_position) const; /** * @brief Get the number of MultilocusGenotype. */ size_t size() const; /** * @brief Clear the container. */ void clear(); }; } // end of namespace bpp; #endif // _POLYMORPHYSMMULTIGCONTAINER_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/AbstractODataSet.cpp000644 000000 000000 00000004147 12147656633 022116 0ustar00rootroot000000 000000 // // File AbstractODataSet.cpp // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "AbstractODataSet.h" using namespace bpp; // From STL #include using namespace std; AbstractODataSet::~AbstractODataSet() {} void AbstractODataSet::write(const string& path, const DataSet& data_set, bool overwrite) const throw (Exception) { ofstream output(path.c_str(), overwrite ? (ios::out) : (ios::out | ios::app)); write(output, data_set); output.close(); } bpp-popgen-2.1.0/src/Bpp/PopGen/MultilocusGenotype.cpp000644 000000 000000 00000015613 12147656633 022641 0ustar00rootroot000000 000000 // // File MultilocusGenotype.cpp // Author : Sylvain Gaillard // Last modification : April 4, 2008 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "MultilocusGenotype.h" using namespace bpp; using namespace std; // ** Class constructor: *******************************************************/ MultilocusGenotype::MultilocusGenotype(size_t loci_number) throw (BadIntegerException) : loci_(vector(loci_number)) { if (loci_number < 1) throw BadIntegerException("MultilocusGenotype::MultilocusGenotype: loci_number must be > 0.", static_cast(loci_number)); // Set all the loci_ pointers to nullptr for (size_t i = 0; i < loci_number; i++) { loci_[i] = 0; } } MultilocusGenotype::MultilocusGenotype(const MultilocusGenotype& genotype) : loci_(vector(genotype.size())) { for (size_t i = 0; i < genotype.size(); i++) { if (!genotype.isMonolocusGenotypeMissing(i)) loci_[i] = dynamic_cast(genotype.getMonolocusGenotype(i).clone()); else loci_[i] = 0; } } // ** Class destructor: *******************************************************/ MultilocusGenotype::~MultilocusGenotype() { for (size_t i = 0; i < loci_.size(); i++) { delete loci_[i]; } loci_.clear(); } // ** Other methodes: *********************************************************/ void MultilocusGenotype::setMonolocusGenotype(size_t locus_position, const MonolocusGenotype& monogen) throw (IndexOutOfBoundsException) { if (locus_position < loci_.size()) loci_[locus_position] = dynamic_cast(monogen.clone()); else throw IndexOutOfBoundsException("MultilocusGenotype::setMonolocusGenotype: locus_position out of bounds.", locus_position, 0, loci_.size()); } void MultilocusGenotype::setMonolocusGenotypeByAlleleKey(size_t locus_position, const std::vector& allele_keys) throw (Exception) { if (allele_keys.size() < 1) throw Exception("MultilocusGenotype::setMonolocusGenotypeByAlleleKey: no key in allele_keys."); if (locus_position < loci_.size()) { setMonolocusGenotype(locus_position, *MonolocusGenotypeTools::buildMonolocusGenotypeByAlleleKey(allele_keys)); } else throw IndexOutOfBoundsException("MultilocusGenotype::setMonolocusGenotype: locus_position out of bounds.", locus_position, 0, loci_.size()); } void MultilocusGenotype::setMonolocusGenotypeByAlleleId(size_t locus_position, const std::vector& allele_id, const LocusInfo& locus_info) throw (Exception) { vector allele_keys; for (size_t i = 0; i < allele_id.size(); i++) { try { allele_keys.push_back(locus_info.getAlleleInfoKey(allele_id[i])); } catch (AlleleNotFoundException& anfe) { throw AlleleNotFoundException("MultilocusGenotype::setMonolocusGenotypeByAlleleId: id not found.", anfe.getIdentifier()); } } try { setMonolocusGenotypeByAlleleKey(locus_position, allele_keys); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("MultilocusGenotype::setMonolocusGenotypeByAlleleId: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]); } } void MultilocusGenotype::setMonolocusGenotypeAsMissing(size_t locus_position) throw (IndexOutOfBoundsException) { if (locus_position >= loci_.size()) throw IndexOutOfBoundsException("MultilocusGenotype::setMonolocusGenotypeAsMissing: locus_position out of bounds.", locus_position, 0, loci_.size()); if (loci_[locus_position] != NULL) delete loci_[locus_position]; loci_[locus_position] = NULL; } bool MultilocusGenotype::isMonolocusGenotypeMissing(size_t locus_position) const throw (IndexOutOfBoundsException) { if (locus_position >= loci_.size()) throw IndexOutOfBoundsException("MultilocusGenotype::isMonolocusGenotypeMissing: locus_position out of bounds.", locus_position, 0, loci_.size()); return loci_[locus_position] == NULL; } const MonolocusGenotype& MultilocusGenotype::getMonolocusGenotype(size_t locus_position) const throw (IndexOutOfBoundsException) { if (locus_position >= loci_.size()) throw IndexOutOfBoundsException("MultilocusGenotype::getMonolocusGenotype: locus_position out of bounds", locus_position, 0, loci_.size()); return *loci_[locus_position]; } size_t MultilocusGenotype::size() const { return loci_.size(); } size_t MultilocusGenotype::countNonMissingLoci() const { size_t count = 0; for (size_t i = 0; i < loci_.size(); i++) { if (loci_[i] != NULL) count++; } return count; } size_t MultilocusGenotype::countHomozygousLoci() const { size_t count = 0; for (size_t i = 0; i < loci_.size(); i++) { try { if (dynamic_cast(loci_[i])->isHomozygous()) count++; } catch (...) {} } return count; } size_t MultilocusGenotype::countHeterozygousLoci() const { size_t count = 0; for (size_t i = 0; i < loci_.size(); i++) { try { if (!(dynamic_cast(loci_[i])->isHomozygous())) count++; } catch (...) {} } return count; } bpp-popgen-2.1.0/src/Bpp/PopGen/Genetix.h000644 000000 000000 00000005545 12147656633 020041 0ustar00rootroot000000 000000 // // File Genetix.h // Author : Sylvain Gaillard // Khalid Belkhir // Last modification : Friday July 30 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _GENETIX_H_ #define _GENETIX_H_ #include #include #include #include // From local Pop #include "AbstractIDataSet.h" #include "BasicAlleleInfo.h" namespace bpp { /** * @brief The Genetix input format for popgenlib. * * @author Sylvain Gaillard */ class Genetix : public AbstractIDataSet { public: // Constructor and destructor Genetix(); ~Genetix(); public: /** * @name The IDataSet interface. * @{ */ void read(std::istream& is, DataSet& data_set) throw (Exception); void read(const std::string& path, DataSet& data_set) throw (Exception); DataSet* read(std::istream& is) throw (Exception); DataSet* read(const std::string& path) throw (Exception); /** * @} */ /** * @name The IOFormat interface * @{ */ const std::string getFormatName() const { return "Genetix ver 4.05"; } const std::string getFormatDescription() const { return "Genetix is a software for populations genetic for Windows(tm)"; } /** * @} */ }; } // end of namespace bpp; #endif // _GENETIX_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/Locality.h000644 000000 000000 00000007173 12147656633 020215 0ustar00rootroot000000 000000 // // File Locality.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _LOCALITY_H_ #define _LOCALITY_H_ // From std lib #include #include namespace bpp { /** * @brief The Locality class. * * This is a class derivated from the Point2D class. * It's a Point2D with a name. * * @author Sylvain Gaillard */ template class Locality : public bpp::Point2D { protected: std::string name_; public: // Constructors and destructor /** * @brief Build a new locality with name and coordinates. * * @param name The name of the locality. * @param x The longitude. * @param y The latitude. */ Locality(const std::string name, const T x = 0, const T y = 0) : bpp::Point2D(x, y), name_(name) {} /** * @brief Build a new locality with name and coordinates. * * @param name The name of the locality. * @param coord The coordinates of the locality. */ Locality(const std::string name, const bpp::Point2D &coord) : bpp::Point2D(coord), name_(name) {} /** * @brief Destroy a locality. */ virtual ~Locality() {} public: // Methodes /** * @brief Implements the Clonable interface. */ Locality* clone() const { return new Locality(*this); } /** * @brief The == operator. * * returns true if both name and coordinates are identical between the two Locality objects. */ virtual bool operator==(const Locality& locality) const { return this->getX() == locality.getX() && this->getY() == locality.getY() && name_ == locality.name_; } /** * @brief The != operator. */ virtual bool operator!=(const Locality& locality) const { return !(locality == *this); } /** * @brief Set the name of the locality. */ void setName(const std::string& name) { name_ = name; } /** * @brief Get the name of the locality. */ const std::string& getName() const { return name_; } }; } // end of namespace bpp; #endif // _LOCALITY_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/PolymorphismMultiGContainerTools.cpp000644 000000 000000 00000031300 12147656633 025465 0ustar00rootroot000000 000000 // // File PolymorphismMultiGContainerTools.cpp // Author : Sylvain Gailard // Khalid Belkhir // Last modification : june 15 2006 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "PolymorphismMultiGContainerTools.h" #include using namespace std; using namespace bpp; /******************************************************************************/ PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutMultiG(const PolymorphismMultiGContainer& pmgc) { PolymorphismMultiGContainer permuted_pmgc(pmgc); vector groups; for (size_t i = 0; i < permuted_pmgc.size(); i++) { groups.push_back(permuted_pmgc.getGroupId(i)); } // use std::random_shuffle instead of RandomTools::getSampl // groups = RandomTools::getSample(groups, groups.size()); std::random_shuffle(groups.begin(), groups.end()); for (size_t i = 0; i < permuted_pmgc.size(); i++) { permuted_pmgc.setGroupId(i, groups[i]); } return permuted_pmgc; } /******************************************************************************/ PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutMonoG(const PolymorphismMultiGContainer& pmgc, const std::set& groups) { PolymorphismMultiGContainer permuted_pmgc; size_t loc_num = pmgc.getNumberOfLoci(); vector > mono_gens; mono_gens.resize(loc_num); // Get all the MonolocusGenotypes to permut for (size_t i = 0; i < pmgc.size(); i++) { if (groups.find(pmgc.getGroupId(i)) != groups.end()) { for (size_t j = 0; j < loc_num; j++) { mono_gens[j].push_back(&pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j)); } } } // Permut the MonolocusGenotypes for (size_t i = 0; i < loc_num; i++) { // mono_gens[i] = RandomTools::getSample(mono_gens[i], mono_gens[i].size()); std::random_shuffle(mono_gens[i].begin(), mono_gens[i].end()); } // Build the new PolymorphismMultiGContainer size_t k = 0; for (size_t i = 0; i < pmgc.size(); i++) { if (groups.find(pmgc.getGroupId(i)) != groups.end()) { MultilocusGenotype tmp_mg(loc_num); for (size_t j = 0; j < loc_num; j++) { if (mono_gens[j][k] != NULL) tmp_mg.setMonolocusGenotype(j, *(mono_gens[j][k])); } permuted_pmgc.addMultilocusGenotype(tmp_mg, pmgc.getGroupId(i)); k++; } else { permuted_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), pmgc.getGroupId(i)); } } // update groups names set grp_ids = pmgc.getAllGroupsIds(); for (set::iterator it = grp_ids.begin(); it != grp_ids.end(); it++) { size_t id = *it; string name = pmgc.getGroupName(id); permuted_pmgc.setGroupName(id, name); } return permuted_pmgc; } /******************************************************************************/ PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutIntraGroupMonoG(const PolymorphismMultiGContainer& pmgc, const std::set& groups) { PolymorphismMultiGContainer permuted_pmgc; size_t loc_num = pmgc.getNumberOfLoci(); vector > mono_gens; mono_gens.resize(loc_num); for (set::const_iterator g = groups.begin(); g != groups.end(); g++) // for each group { size_t nb_ind_in_group = 0; // Get all the MonolocusGenotypes of group g to permut for (size_t i = 0; i < pmgc.size(); i++) { size_t indiv_grp = pmgc.getGroupId(i); if (groups.find(indiv_grp) != groups.end()) { if (indiv_grp == *g) { nb_ind_in_group++; for (size_t j = 0; j < loc_num; j++) { mono_gens[j].push_back(&pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j)); } } } else // insert as is { permuted_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), indiv_grp); } } // for i // Permut the MonolocusGenotypes if (nb_ind_in_group > 0) { for (size_t j = 0; j < loc_num; j++) { // mono_gens[j] = RandomTools::getSample(mono_gens[j], mono_gens[j].size()); std::random_shuffle(mono_gens[j].begin(), mono_gens[j].end()); } // Build the new multilocus genotypes MultilocusGenotype tmp_mg(loc_num); for (size_t k = 0; k < nb_ind_in_group; k++) { for (size_t j = 0; j < loc_num; j++) { if (mono_gens[j][k] != NULL) tmp_mg.setMonolocusGenotype(j, *(mono_gens[j][k])); } // for j permuted_pmgc.addMultilocusGenotype(tmp_mg, (*g)); } // for k } // if nb_ind_in_group } // for g // update groups names set grp_ids = pmgc.getAllGroupsIds(); for (set::iterator it = grp_ids.begin(); it != grp_ids.end(); it++) { size_t id = *it; string name = pmgc.getGroupName(id); permuted_pmgc.setGroupName(id, name); } return permuted_pmgc; } /******************************************************************************/ PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutAlleles(const PolymorphismMultiGContainer& pmgc, const std::set& groups) { PolymorphismMultiGContainer permuted_pmgc; size_t loc_num = pmgc.getNumberOfLoci(); vector > alleles; alleles.resize(loc_num); // Get all the alleles to permut for (size_t i = 0; i < pmgc.size(); i++) { if (groups.find(pmgc.getGroupId(i)) != groups.end()) { for (size_t j = 0; j < loc_num; j++) { if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(j)) for (size_t k = 0; k < pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex().size(); k++) { alleles[j].push_back(pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex()[k]); } } } } // Permut the alleles for (size_t i = 0; i < loc_num; i++) { // alleles[i] = RandomTools::getSample(alleles[i], alleles[i].size()); std::random_shuffle(alleles[i].begin(), alleles[i].end()); } // Build the new PolymorphismMultiGContainer vector k(loc_num, 0); for (size_t i = 0; i < pmgc.size(); i++) { if (groups.find(pmgc.getGroupId(i)) != groups.end()) { MultilocusGenotype tmp_mg(loc_num); for (size_t j = 0; j < loc_num; j++) { if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(j)) { if (pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex().size() == 1) tmp_mg.setMonolocusGenotype(j, MonoAlleleMonolocusGenotype(alleles[j][k[j]++])); if (pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex().size() == 2) tmp_mg.setMonolocusGenotype(j, BiAlleleMonolocusGenotype(alleles[j][k[j]++], alleles[j][k[j]++])); } } permuted_pmgc.addMultilocusGenotype(tmp_mg, pmgc.getGroupId(i)); } else { permuted_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), pmgc.getGroupId(i)); } } // update groups names set grp_ids = pmgc.getAllGroupsIds(); for (set::iterator it = grp_ids.begin(); it != grp_ids.end(); it++) { size_t id = *it; string name = pmgc.getGroupName(id); permuted_pmgc.setGroupName(id, name); } return permuted_pmgc; } /******************************************************************************/ PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutIntraGroupAlleles(const PolymorphismMultiGContainer& pmgc, const std::set& groups) { PolymorphismMultiGContainer permuted_pmgc; size_t loc_num = pmgc.getNumberOfLoci(); vector > alleles; alleles.resize(loc_num); for (set::const_iterator g = groups.begin(); g != groups.end(); g++) // for each group { int nb_ind_in_group = 0; vector< vector > nb_alleles_for_inds; nb_alleles_for_inds.resize(loc_num); // Get all the alleles to permut for (size_t i = 0; i < pmgc.size(); i++) { size_t indiv_grp = pmgc.getGroupId(i); if (groups.find(indiv_grp) != groups.end() ) { if (indiv_grp == *g) { nb_ind_in_group++; for (size_t j = 0; j < loc_num; j++) { if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(j)) { size_t nb_alls = pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex().size(); nb_alleles_for_inds[j].push_back(nb_alls); for (size_t k = 0; k < nb_alls; k++) { alleles[j].push_back(pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex()[k]); } } } } } else // inserer tel quel { permuted_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), indiv_grp); } } // for i // Permut the alleles if (nb_ind_in_group > 0) { for (size_t i = 0; i < loc_num; i++) { // alleles[i] = RandomTools::getSample(alleles[i], alleles[i].size()); std::random_shuffle(alleles[i].begin(), alleles[i].end()); } // Build the new PolymorphismMultiGContainer vector k(loc_num, 0); for (int ind = 0; ind < nb_ind_in_group; ind++) { MultilocusGenotype tmp_mg(loc_num); for (size_t j = 0; j < loc_num; j++) { if (nb_alleles_for_inds[j][ind] == 1) tmp_mg.setMonolocusGenotype(j, MonoAlleleMonolocusGenotype(alleles[j][k[j]++])); if (nb_alleles_for_inds[j][ind] == 2) tmp_mg.setMonolocusGenotype(j, BiAlleleMonolocusGenotype(alleles[j][k[j]++], alleles[j][k[j]++])); } // for j permuted_pmgc.addMultilocusGenotype(tmp_mg, (*g)); } // for ind } // if nb_ind_in_group } // for g // update groups names set grp_ids = pmgc.getAllGroupsIds(); for (set::iterator it = grp_ids.begin(); it != grp_ids.end(); it++) { size_t id = *it; string name = pmgc.getGroupName(id); permuted_pmgc.setGroupName(id, name); } return permuted_pmgc; } /******************************************************************************/ PolymorphismMultiGContainer PolymorphismMultiGContainerTools::extractGroups(const PolymorphismMultiGContainer& pmgc, const std::set& groups) { PolymorphismMultiGContainer sub_pmgc; for (set::const_iterator g = groups.begin(); g != groups.end(); g++) // for each group { // Get all the MonolocusGenotypes of group g to extract for (size_t i = 0; i < pmgc.size(); i++) { size_t indiv_grp = pmgc.getGroupId(i); if (groups.find(indiv_grp) != groups.end() ) { if (indiv_grp == *g) { sub_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), indiv_grp); } } } // for i } // for g // update groups names set grp_ids = sub_pmgc.getAllGroupsIds(); for (set::iterator it = grp_ids.begin(); it != grp_ids.end(); it++) { size_t id = *it; string name = pmgc.getGroupName(id); sub_pmgc.setGroupName(id, name); } return sub_pmgc; } /******************************************************************************/ bpp-popgen-2.1.0/src/Bpp/PopGen/DataSetTools.h000644 000000 000000 00000005133 12147656633 020775 0ustar00rootroot000000 000000 // // File DataSetTools.h // Author : Sylvain Gaillard // Last modification : Wednesday August 04 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _DATASETTOOLS_H_ #define _DATASETTOOLS_H_ // From STL #include #include #include #include // From SeqLib #include // From local PopGenLib #include "DataSet.h" #include "PolymorphismSequenceContainer.h" namespace bpp { /** * @brief A set of tools for DataSet. * * @author Sylvain Gaillard */ class DataSetTools { public: /** * @brief General method to build a DataSet from an OrderedSequenceContainer. */ static std::auto_ptr buildDataSet(const OrderedSequenceContainer& osc) throw (Exception); /** * @brief Specific methode to build a DataSet from a PolymorphismSequenceContainer. */ static std::auto_ptr buildDataSet(const PolymorphismSequenceContainer& psc) throw (Exception); }; } // end of namespace bpp; #endif // _DATASETTOOLS_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/LocusInfo.h000644 000000 000000 00000010303 12147656633 020323 0ustar00rootroot000000 000000 // // File LocusInfo.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _LOCUSINFO_H_ #define _LOCUSINFO_H_ // From STL #include #include // From local Popgenlib #include "AlleleInfo.h" #include "GeneralExceptions.h" #include namespace bpp { /** * @brief The LocusInfo class. * * This is an AlleleInfo container with additionnal data like a name, * the ploidy and some comments. * * @author Sylvain Gaillard */ class LocusInfo { private: std::string name_; unsigned int ploidy_; std::vector alleles_; public: static unsigned int HAPLODIPLOID; static unsigned int HAPLOID; static unsigned int DIPLOID; static unsigned int UNKNOWN; public: // Constructors and destructor /** * @brief Build a new LocusInfo object. * * @param name The name of the locus. * @param ploidy The ploidy of the locus. */ LocusInfo(const std::string& name, const unsigned int ploidy = DIPLOID); /** * @brief Copy constructor. */ LocusInfo(const LocusInfo& locus_info); /** * @brief Destroy the LocusInfo. */ virtual ~LocusInfo(); public: // Methodes /** * @brief Get the name of the locus. */ const std::string& getName() const { return name_; } /** * @brief Get the ploidy of the locus. * * @return The ploidy as an unsigned integer. */ unsigned int getPloidy() const { return ploidy_; } /** * @brief Add an AlleleInfo to the LocusInfo. * * @throw BadIdentifierException if the AlleleInfo's id already exists. */ void addAlleleInfo(const AlleleInfo& allele) throw (BadIdentifierException); /** * @brief Retrieve an AlleleInfo object of the LocusInfo. * * @throw AlleleNotFoundException if the id is not found. */ const AlleleInfo& getAlleleInfoById(const std::string& id) const throw (AlleleNotFoundException); /** * @brief Retrieve an AlleleInfo object of the LocusInfo. * * @throw IndexOutOfBoundsException if key excedes the number of alleles. */ const AlleleInfo& getAlleleInfoByKey(size_t key) const throw (IndexOutOfBoundsException); /** * @brief Get the position of an AlleleInfo. * * @throw AlleleNotFoundException if the AlleleInfo's id is not found. */ unsigned int getAlleleInfoKey(const std::string& id) const throw (AlleleNotFoundException); /** * @brief Get the number of alleles at this locus. */ size_t getNumberOfAlleles() const; /** * @brief Delete all alleles from the locus. */ void clear(); }; } // end of namespace bpp; #endif // _LOCUSINFO_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/PolymorphismSequenceContainerTools.h000644 000000 000000 00000022502 12147656633 025505 0ustar00rootroot000000 000000 // // File: PolymorphismSequenceContainerTools.h // Authors: Eric Bazin // Sylvain Gaillard // Created on: Thursday July 29 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _POLYMORPHISMSEQUENCECONTAINERTOOL_H_ #define _POLYMORPHISMSEQUENCECONTAINERTOOL_H_ #include #include // from SeqLib #include #include #include #include #include #include #include // from STL #include // From Local #include "PolymorphismSequenceContainer.h" #include "GeneralExceptions.h" namespace bpp { /** * @brief Utilitary function to manipulate PolymorphismSequenceContainer * * @author Sylvain Gaillard */ class PolymorphismSequenceContainerTools { public: // Class destructor: ~PolymorphismSequenceContainerTools(); /*******************************************************************************/ public: /** * @brief Read a Mase+ file and return a PolymorphismSequenceContainer. Toggle Sequence * when selection tag begin with OUTGROUP (see Polymorphix) * * @param path Path to the Mase+ file * @param alpha Sequence Alphabet * * @throw Exception if the file is not in the specified format */ static PolymorphismSequenceContainer* read(const std::string& path, const Alphabet* alpha) throw (Exception); /** * @brief Extract ingroup sequences from a PolymorphismSequenceContainer and create a new one. * * @param psc a PolymorphismSequenceContainer reference * * @throw Exception if there is no ingroup sequence */ static PolymorphismSequenceContainer* extractIngroup (const PolymorphismSequenceContainer& psc) throw (Exception); /** * @brief Extract outgroup sequences from a PolymorphismSequenceContainer and create a new one. * * @param psc a PolymorphismSequenceContainer reference * * @throw Exception if there is no outgroup sequence */ static PolymorphismSequenceContainer* extractOutgroup (const PolymorphismSequenceContainer& psc) throw (Exception); /** * @brief Extract a special group from the PolymorphismSequenceContainer. * * @param psc a PolymorphismSequenceContainer reference. * @param group_id the group identifier as an size_t. * * @throw GroupNotFoundException if group_id is not found. */ static PolymorphismSequenceContainer* extractGroup(const PolymorphismSequenceContainer& psc, size_t group_id) throw (Exception); /** * @brief Extract selected sequences * * @param psc a PolymorphismSequenceContainer reference. * @param ss a sequence selection. * */ static PolymorphismSequenceContainer* getSelectedSequences(const PolymorphismSequenceContainer& psc, const SequenceSelection& ss); /** * @brief Get a random set of sequences * * @param psc a PolymorphismSequenceContainer reference * @param n the number of sequence to get * @param replace a boolean flag true for sampling with replacement */ static PolymorphismSequenceContainer* sample(const PolymorphismSequenceContainer& psc, size_t n, bool replace = true); /** * @brief Retrieves sites without gaps from PolymorphismSequenceContainer. * * @param psc a PolymorphismSequenceContainer reference */ static PolymorphismSequenceContainer* getSitesWithoutGaps (const PolymorphismSequenceContainer& psc); /** * @brief Return number of sites without gaps in a PolymorphismSequenceContainer. * * @param psc a PolymorphismSequenceContainer reference * @param ingroup a boolean set to true if you want to take only ingroup sequences into account * * @throw Exception if there is no ingroup sequence */ static size_t getNumberOfNonGapSites(const PolymorphismSequenceContainer& psc, bool ingroup) throw (Exception); /** * @brief Return number of completely resolved sites in a PolymorphismSequenceContainer. * * * @param psc a PolymorphismSequenceContainer reference * @param ingroup a boolean set to true if you want to take only ingroup sequences into account * * @throw Exception if there is no ingroup sequence */ static size_t getNumberOfCompleteSites(const PolymorphismSequenceContainer& psc, bool ingroup) throw (Exception); /** * @brief Retrieves complete sites from a PolymorphismSequenceContainer. * * @param psc a PolymorphismSequenceContainer reference */ static PolymorphismSequenceContainer* getCompleteSites(const PolymorphismSequenceContainer& psc); /** * @brief exclude flanking sites with gap but keep gap sites within the alignment * * @param psc a PolymorphismSequenceContainer reference */ static PolymorphismSequenceContainer* excludeFlankingGap(const PolymorphismSequenceContainer& psc); /** * @brief Get a PolymorphismSequenceContainer corresponding to a site selection annotated in the mase comments * * Be carefull : in the new PolymorphismSequenceContainer the mase comments are lost * Information about cds positions and start codon is no more available * * @param psc a PolymorphismSequenceContainer. * @param setName The name of the set to retrieve. * @param phase a boolean set to true if you want to take the phase into account during the extraction. It removes the useless sites. */ static PolymorphismSequenceContainer* getSelectedSites(const PolymorphismSequenceContainer& psc, const std::string& setName, bool phase); /** * @brief Retrieve non-coding sites defined in the mase file header * * Be carefull: to use before excluding gap * * @param psc a PolymorphismSequenceContainer reference * @param setName name of the CDS site selection */ static PolymorphismSequenceContainer* getNonCodingSites(const PolymorphismSequenceContainer& psc, const std::string& setName); /** * @brief Retrieve sites at one codon position (1,2,3) * * Be carefull: to use before excluding gap * Be careful: if there is no phase information, the method catch an exception and set the phase to 1 * This allows to use this method for PolymorphismSequenceContainer generated by getSelectedSequence * * @param psc a PolymorphismSequenceContainer reference * @param setName name of the CDS site selection * @param pos position index. */ static PolymorphismSequenceContainer* getOnePosition(const PolymorphismSequenceContainer& psc, const std::string& setName, size_t pos); /** * @brief Retrieve intron sites * * Same as getNonCodgingSites but exclude 5' and 3' flanking regions if there are * * @param psc a PolymorphismSequenceContainer * @param setName name of the CDS site selection * @param ca a codon alphabet */ static PolymorphismSequenceContainer* getIntrons(const PolymorphismSequenceContainer& psc, const std::string& setName, const CodonAlphabet* ca ); /** * @brief Retrieve 5' sites * * @param psc a PolymorphismSequenceContainer * @param setName name of the CDS site selection */ static PolymorphismSequenceContainer* get5Prime(const PolymorphismSequenceContainer& psc, const std::string& setName); /** * @brief Retrieve 3' sites * * @param psc a PolymorphismSequenceContainer * @param setName name of the CDS site selection * @param ca a codon alphabet */ static PolymorphismSequenceContainer* get3Prime(const PolymorphismSequenceContainer& psc, const std::string& setName, const CodonAlphabet* ca ); /** * @brief Get the species name of the ingroup * * @param psc a PolymorphismSequenceContainer. */ static std::string getIngroupSpeciesName(const PolymorphismSequenceContainer& psc); }; } // end of namespace bpp; #endif // _POLYMORPHISMSEQUENCECONTAINERTOOL_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/IDataSet.h000644 000000 000000 00000005111 12147656633 020061 0ustar00rootroot000000 000000 // // File IDataSet.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _IDATASET_H_ #define _IDATASET_H_ #include "IODataSet.h" #include namespace bpp { /** * @brief The IDataSet interface. * * @author Sylvain Gaillard */ class IDataSet : public virtual IODataSet { public: // Class destructor virtual ~IDataSet() {} public: /** * @brief Read a DataSet on istream. */ virtual void read(std::istream& is, DataSet& data_set) throw (Exception) = 0; /** * @brief Read a DataSet from a text file. */ virtual void read(const std::string& path, DataSet& data_set) throw (Exception) = 0; /** * @brief Read istream and return a DataSet. */ virtual DataSet* read(std::istream& is) throw (Exception) = 0; /** * @brief Read a text file and return a DataSet. */ virtual DataSet* read(const std::string& path) throw (Exception) = 0; }; } // end of namespace bpp; #endif // _IDATASET_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/ODataSet.h000644 000000 000000 00000004556 12147656633 020103 0ustar00rootroot000000 000000 // // File ODataSet.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ODATASET_H_ #define _ODATASET_H_ #include "IODataSet.h" #include namespace bpp { /** * @brief The ODataSet interface. * * @author Sylvain Gaillard */ class ODataSet : public virtual IODataSet { public: // Class destructor virtual ~ODataSet() {} public: /** * @brief Write a DataSet on ostream. */ virtual void write(std::ostream& os, const DataSet& data_set) const throw (Exception) = 0; /** * @brief Write a DataSet in a text file. */ virtual void write(const std::string& path, const DataSet& data_set, bool overwrite) const throw (Exception) = 0; }; } // end of namespace bpp; #endif // _ODATASET_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/DarwinDon.cpp000644 000000 000000 00000005627 12147656633 020657 0ustar00rootroot000000 000000 // // File DarwinDon.cpp // Authors : Sylvain Gaillard // Last modification : April 7, 2008 // /* Copyright or © or Copr. CNRS, (April 7, 2008) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "DarwinDon.h" #include using namespace bpp; using namespace std; DarwinDon::DarwinDon() {} DarwinDon::~DarwinDon() {} void DarwinDon::write(ostream& os, const DataSet& data_set) const throw (Exception) { if (!os) throw IOException("DarwinDon::write: fail to open stream."); StlOutputStreamWrapper out(&os); (out << "@DARwin 5.0 - DON").endLine(); size_t ind_nbr = 0; for (size_t i = 0; i < data_set.getNumberOfGroups(); i++) { ind_nbr += data_set.getNumberOfIndividualsInGroup(i); } vector header; header.push_back("N°"); header.push_back("Name"); (out << ind_nbr << "\t" << header.size() - 1).endLine(); VectorTools::print(header, out, "\t"); // size_t ind_index = 0; for (size_t i = 0; i < data_set.getNumberOfGroups(); i++) { size_t ind_nbr_ig = data_set.getNumberOfIndividualsInGroup(i); for (size_t j = 0; j < ind_nbr_ig; j++) { (out << j + (i * ind_nbr_ig) + 1 << "\t" << data_set.getIndividualAtPositionFromGroup(i, j)->getId()).endLine(); } } } void DarwinDon::write(const string& path, const DataSet& data_set, bool overwrite) const throw (Exception) { AbstractODataSet::write(path, data_set, overwrite); } bpp-popgen-2.1.0/src/Bpp/PopGen/AnalyzedSequences.cpp000644 000000 000000 00000007335 12147656633 022413 0ustar00rootroot000000 000000 // // File AnalyzedSequences.cpp // Created by: Sylvain Gaillard // Created on: Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "AnalyzedSequences.h" #include #include #include using namespace bpp; using namespace std; AnalyzedSequences::AnalyzedSequences() : alphabet_(0), autoset_(false) {} AnalyzedSequences::AnalyzedSequences(const Alphabet* alpha) : alphabet_(alpha), autoset_(false) {} AnalyzedSequences::~AnalyzedSequences() { clear_(); } AnalyzedSequences::AnalyzedSequences(const AnalyzedSequences& as) : alphabet_(0), autoset_(false) { if (as.autoset_) { setAlphabet(as.getAlphabetType()); } else { alphabet_ = as.alphabet_; } autoset_ = as.autoset_; } AnalyzedSequences& AnalyzedSequences::operator=(const AnalyzedSequences& as) { if (as.autoset_) { setAlphabet(as.getAlphabetType()); } else { alphabet_ = as.alphabet_; } autoset_ = as.autoset_; return *this; } void AnalyzedSequences::setAlphabet(const Alphabet* alpha) { alphabet_ = alpha; autoset_ = false; } void AnalyzedSequences::setAlphabet(const std::string& alpha_type) throw (Exception) { if (alpha_type != string("DNA") && alpha_type != string("RNA") && alpha_type != string("PROTEIN")) throw Exception(string("AnalyzedSequences::setAlphabet: bad alphabet type. (") + alpha_type + string(").")); Alphabet* alpha = 0; if (alpha_type == string("DNA")) alpha = new DNA(); if (alpha_type == string("RNA")) alpha = new RNA(); if (alpha_type == string("PROTEIN")) alpha = new ProteicAlphabet(); alphabet_ = alpha; autoset_ = true; } std::string AnalyzedSequences::getAlphabetType() const { if (alphabet_ == 0) return string("---"); string alpha_type = alphabet_->getAlphabetType(); size_t bs = alpha_type.find(" ", 0); alpha_type = string(alpha_type.begin(), alpha_type.begin() + bs); if (alpha_type == "Proteic") alpha_type = "PROTEIN"; return alpha_type; } void AnalyzedSequences::clear_() { if (alphabet_ != 0 && autoset_) { delete alphabet_; alphabet_ = 0; autoset_ = false; } } bpp-popgen-2.1.0/src/Bpp/PopGen/DataSet.h000644 000000 000000 00000065301 12147656633 017757 0ustar00rootroot000000 000000 // // File DataSet.h // Author : Sylvain Gaillard // Last modification : April 4, 2008 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _DATASET_H_ #define _DATASET_H_ // From the STL #include #include #include #include #include #include #include // From PopGenLib (local) #include "Group.h" #include "Individual.h" #include "Locality.h" #include "GeneralExceptions.h" #include "AnalyzedLoci.h" #include "AnalyzedSequences.h" #include "PolymorphismMultiGContainer.h" #include "PolymorphismSequenceContainer.h" namespace bpp { /** * @brief The DataSet class. * * A DataSet the object that manage every data on which one can compute * some statistics. * * @author Sylvain Gaillard */ class DataSet { private: AnalyzedLoci* analyzedLoci_; AnalyzedSequences* analyzedSequences_; std::vector*> localities_; std::vector groups_; public: // Constructor and destructor /** * @brief Build a new void DataSet. */ DataSet(); /** * @brief Destroy a DataSet. */ ~DataSet(); /** * @brief Copy constructor. */ DataSet(const DataSet& ds); DataSet& operator=(const DataSet& ds); public: // Methodes // ** Locality manipulation ***************************************************/ /** * @brief Add a locality to the DataSet. * * @param locality A Locality object. * @throw BadIdentifierException if the locality's name already exists. */ void addLocality(Locality& locality) throw (BadIdentifierException); /** * @brief Get the position of a locality in the container. * * @return The locality_position (position) of the Locality. * @param name The locality's name to find. * @throw LocalityNotFoundException if the locality's name doesn't match any name in the DataSet. */ size_t getLocalityPosition(const std::string& name) const throw (LocalityNotFoundException); /** * @brief Get a Locality by locality_position. * * @return A const pointer to the locality matching the locality_position. * @param locality_position The position of the Locality in the DataSet. * @throw IndexOutOfBoundsException if locality_position excedes the number of locality of the DataSet. */ const Locality& getLocalityAtPosition(size_t locality_position) const throw (IndexOutOfBoundsException); /** * @brief Get a Locality by name. * * @throw LocalityNotFoundException if the locality's name is not found. */ const Locality& getLocalityByName(const std::string& name) const throw (LocalityNotFoundException); /** * @brief Delete a Locality from the DataSet. * * @throw IndexOutOfBoundsException if locality_position excedes the number of Locality. */ void deleteLocalityAtPosition(size_t locality_position) throw (IndexOutOfBoundsException); /** * @brief Delete a Locality from the DataSet. * * @throw LocalityNotFoundException if the locality's name is not found. */ void deleteLocalityByName(const std::string& name) throw (LocalityNotFoundException); /** * @brief Get the number of Localities. */ size_t getNumberOfLocalities() const; /** * @brief Tell if there is at least one locality. */ bool hasLocality() const; // ** Group manipulation ******************************************************/ /** * @brief Add a Group to the DataSet. * * Add a Group to the DataSet. * * @param group A pointer to the Group to add. */ void addGroup(const Group& group) throw (BadIdentifierException); /** * @brief Add an empty Group to the DataSet. */ void addEmptyGroup(size_t group_id) throw (BadIdentifierException); /** * @brief Get a group by identifier. */ const Group& getGroupById(size_t group_id) const throw (GroupNotFoundException); /** * @brief Get the position of a Group. * * @throw GroupNotFoundException if the group_id is not found. */ size_t getGroupPosition(size_t group_id) const throw (GroupNotFoundException); /** * @brief Get the name of a Group. If the name is an empty string it just returns the group_id * * @throw GroupNotFoundException if the group_id is not found. */ std::string getGroupName(size_t group_id) const throw (GroupNotFoundException); /** * @brief set the name of a Group. * * @throw GroupNotFoundException if the group_id is not found. */ void setGroupName(size_t group_id, const std::string& group_name) const throw (GroupNotFoundException); /** * @brief Get a group by position. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. */ const Group& getGroupAtPosition(size_t group_position) const throw (IndexOutOfBoundsException); /** * @brief Delete a Group from the DataSet. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. */ void deleteGroupAtPosition(size_t group_position) throw (IndexOutOfBoundsException); /** * @brief Get the number of Groups. */ size_t getNumberOfGroups() const; /** * @brief Merge two groups. * * This methode merge two groups. The source group is emptied into the target * and then is deleted. */ void mergeTwoGroups(size_t source_id, size_t target_id) throw (GroupNotFoundException); /** * @brief Merge some Groups in one. * * Merge all the groups which are specified in the first one (smallest * identifier). When a group is merged to the first, it is deleted from the * DataSet. * * @param group_ids A vector size_t listing the id of groups to merge. * @throw IndexOutOfBoundsException if one of the int in groups excedes the number of groups. */ void mergeGroups(std::vector& group_ids) throw (GroupNotFoundException); /** * @brief Split a group in two. * * @param group_id The identifier of the source group. * @param individuals_selection The positions of the Individuals to extract from the group to make the new group. * @throw GroupNotFoundException if the group_id is not found. * @throw IndexOutOfBoundsException if one position of the selection excedes the number of individuals of the group. */ void splitGroup(size_t group_id, std::vector individuals_selection) throw (Exception); // ** Individuals manipulation ************************************************/ /** * @brief Add an Individual to a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw BadIdentifierException if the individual's id is already in use. */ void addIndividualToGroup(size_t group_position, const Individual& individual) throw (Exception); /** * @brief Add an empty Individual to a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw BadIdentifierException if the individual's id is already in use. */ void addEmptyIndividualToGroup(size_t group_position, const std::string& individual_id) throw (Exception); /** * @brief Get the number of Individuals in a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. */ size_t getNumberOfIndividualsInGroup(size_t group_position) const throw (IndexOutOfBoundsException); /** * @brief Get the position of an Individual in a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndividualNotFoundException if individual_id is not found. */ size_t getIndividualPositionInGroup(size_t group_position, const std::string& individual_id) const throw (Exception); /** * @brief Get an Individual from a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. */ const Individual* getIndividualAtPositionFromGroup(size_t group_position, size_t individual_position) const throw (IndexOutOfBoundsException); /** * @brief Get an Individual from a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndividualNotFoundException if individual_id is not found. */ const Individual* getIndividualByIdFromGroup(size_t group_position, const std::string& individual_id) const throw (Exception); /** * @brief Delete an Individual from a group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. */ void deleteIndividualAtPositionFromGroup(size_t group_position, size_t individual_position) throw (IndexOutOfBoundsException); /** * @brief Delete an Individual from a group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndividualNotFoundException if individual_id is not found. */ void deleteIndividualByIdFromGroup(size_t group_position, const std::string& individual_id) throw (Exception); /** * @brief Set the sex of an Individual in a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. */ void setIndividualSexInGroup(size_t group_position, size_t individual_position, const unsigned short sex) throw (IndexOutOfBoundsException); /** * @brief Get the sex of an Individual in a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. */ unsigned short getIndividualSexInGroup(size_t group_position, size_t individual_position) const throw (IndexOutOfBoundsException); /** * @brief Set the Date of an Individual in a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. */ void setIndividualDateInGroup(size_t group_position, size_t individual_position, const Date& date) throw (IndexOutOfBoundsException); /** * @brief Get the Date of an Individual in a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no date. */ const Date* getIndividualDateInGroup(size_t group_position, size_t individual_position) const throw (Exception); /** * @brief Set the coordinates of an Individual in a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. */ void setIndividualCoordInGroup(size_t group_position, size_t individual_position, const Point2D& coord) throw (IndexOutOfBoundsException); /** * @brief Get the coordinate of an Individual in a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no coordinate. */ const Point2D* getIndividualCoordInGroup(size_t group_position, size_t individual_position) const throw (Exception); /** * @brief Set the Locality of an Individual in a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw LocalityNotFoundException if locality_name is not found. */ void setIndividualLocalityInGroupByName(size_t group_position, size_t individual_position, const std::string& locality_name) throw (Exception); /** * @brief Get the Locality of an Individual in a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no locality. */ const Locality* getIndividualLocalityInGroup(size_t group_position, size_t individual_position) const throw (Exception); /** * @brief Add a Sequence to an Individual in a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw AlphabetMismatchException if the sequence's alphabet doesn't match the container's alphabet. * @throw BadIdentifierException if the sequence's name is already in use. */ void addIndividualSequenceInGroup(size_t group_position, size_t individual_position, size_t sequence_position, const Sequence& sequence) throw (Exception); /** * @brief Get a Sequence from an Individual of a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no sequences. * @throw SequenceNotFoundException if sequence_name is not found. * @throw BadIntegerException if sequence_position is already in use. */ const Sequence& getIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) const throw (Exception); /** * @brief Get a Sequence from an Individual of a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no sequences. * @throw SequenceNotFoundException if sequence_position is not found. */ const Sequence& getIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position) const throw (Exception); /** * @brief Delete a Sequence of an Individual of a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no sequences. * @throw SequenceNotFoundException if sequence_name is not found. */ void deleteIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) throw (Exception); /** * @brief Delete a Sequence of an Individual of a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no sequences. * @throw SequenceNotFoundException if sequence_position is not found. */ void deleteIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position) throw (Exception); /** * @brief Get the Sequences' names from an Individual of a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no sequences. */ std::vector getIndividualSequencesNamesInGroup(size_t group_position, size_t individual_position) const throw (Exception); /** * @brief Get the position of a Sequence in an Individual of a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no sequences. * @throw SequenceNotFoundException if sequence_name is not found. */ size_t getIndividualSequencePositionInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) const throw (Exception); /** * @brief Get the number of Sequences in an Individual of a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no sequences. */ size_t getIndividualNumberOfSequencesInGroup(size_t group_position, size_t individual_position) const throw (Exception); /** * @brief Set the MultilocusGenotype of an Individual in a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. */ void setIndividualGenotypeInGroup(size_t group_position, size_t individual_position, const MultilocusGenotype& genotype) throw (Exception); /** * @brief Initialyze the genotype of an Individual in a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw BadIntegerException if the number of loci is < 1; * @throw NullPointerException if analyzed_loci is NULL. * @throw Exception if the individual already has a genotype. */ void initIndividualGenotypeInGroup(size_t group_position, size_t individual_position) throw (Exception); /** * @brief Delete the MultilocusGenotype of an Individual from a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. */ void deleteIndividualGenotypeInGroup(size_t group_position, size_t individual_position) throw (IndexOutOfBoundsException); /** * @brief Set a MonolocusGenotype of an Individual from a group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no genotype. * @throw IndexOutOfBoundsException if locus_position excedes the number of locus. */ void setIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position, const MonolocusGenotype& monogen) throw (Exception); /** * @brief Set a MonolocusGenotype of an Individual from a group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no genotype. * @throw IndexOutOfBoundsException if locus_position excedes the number of locus. * @throw Exception if the ploidy doesn't match. */ void setIndividualMonolocusGenotypeByAlleleKeyInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector allele_keys) throw (Exception); /** * @brief Set a MonolocusGenotype of an Individual from a group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no genotype. * @throw IndexOutOfBoundsException if locus_position excedes the number of locus. * @throw Exception if there is no key in allele_keys. */ void setIndividualMonolocusGenotypeByAlleleIdInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector allele_id) throw (Exception); /** * @brief Get a MonolocusGenotype from an Individual of a Group. * * @throw IndexOutOfBoundsException if group_position excedes the number of groups. * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group. * @throw NullPointerException if the individual has no genotype. * @throw IndexOutOfBoundsException if locus_position excedes the number of locus. * @throw AlleleNotFoundException if at least one of the id is not found. */ const MonolocusGenotype* getIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position) const throw (Exception); // ** AnalyzedSequences manipulation ******************************************/ /** * @brief Set the alphabet of the AnalyzedSequences. */ void setAlphabet(const Alphabet* alpha); /** * @brief Set the alphabet of the AnalyzedSequences by its type.. */ void setAlphabet(const std::string& alpha_type); /** * @brief Get the alphabet if there is sequence data. * * @throw NullPointerException if there is no sequence data. */ const Alphabet* getAlphabet() const throw (NullPointerException); /** * @brief Get the alphabet type as a string. * * @throw NullPointerException if there is no sequence data. */ std::string getAlphabetType() const throw (NullPointerException); // ** AnalyzedLoci manipulation ***********************************************/ /** * @brief Set the AnalyzedLoci to the DataSet. * * @throw Exception if at least one Individual has a genotype refering to the actual AnalyzedLoci. */ void setAnalyzedLoci(const AnalyzedLoci& analyzedLoci) throw (Exception); /** * @brief Initialize the AnalyzedLoci for number of loci. * * @throw Exception if the AnalyzedLoci has already been initialyzed. */ void initAnalyzedLoci(size_t number_of_loci) throw (Exception); /** * @brief Get the AnalyzedLoci if there is one. * * @throw NullPointerException if there is no AnalyzedLoci. */ const AnalyzedLoci* getAnalyzedLoci() const throw (NullPointerException); /** * @brief Delete the AnalyzedLoci. */ void deleteAnalyzedLoci(); /** * @brief Set a LocusInfo. * * @throw NullPointerException if there is no AnalyzedLoci to setup. * @throw IndexOutOfBoundsException if locus_position excedes the total of LocusInfo of the DataSet. */ void setLocusInfo(size_t locus_position, const LocusInfo& locus) throw (Exception); /** * @brief Get a LocusInfo by its name. */ const LocusInfo& getLocusInfoByName(const std::string& locus_name) const throw (Exception); /** * @brief Get a LocusInfo by its position. */ const LocusInfo& getLocusInfoAtPosition(size_t locus_position) const throw (Exception); /** * @brief Add an AlleleInfo to a LocusInfo. */ void addAlleleInfoByLocusName(const std::string& locus_name, const AlleleInfo& allele) throw (Exception); /** * @brief Add an AlleleInfo to a LocusInfo. */ void addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo& allele) throw (Exception); /** * @brief Get the number of loci. */ size_t getNumberOfLoci() const throw (NullPointerException); /** * @brief Get the ploidy of a locus. */ size_t getPloidyByLocusName(const std::string& locus_name) const throw (Exception); /** * @brief Get the ploidy of a locus. */ size_t getPloidyByLocusPosition(size_t locus_position) const throw (Exception); // ** Container extraction ***************************************************/ /** * @brief Get a PolymorphismMultiGContainer with all allelic data of the DataSet. */ PolymorphismMultiGContainer* getPolymorphismMultiGContainer() const; /** * @brief Get a PolymorphismMultiGContainer from a selection of groups and individuals. * * @param selection A map with groups id as keys and vector of individuals position in each group as values. */ PolymorphismMultiGContainer* getPolymorphismMultiGContainer(const std::map >& selection) const throw (Exception); /** * @brief Get a PolymorphismSequenceContainer from a selection of groups and individuals. * * All the sequences are ingroup. You may change their state after created the container. * @param selection A map with groups id as keys and vector of individuals position in each group as values. * @param sequence_position The position of the sequence in the individuals; */ PolymorphismSequenceContainer* getPolymorphismSequenceContainer(const std::map >& selection, size_t sequence_position) const throw (Exception); // ** General tests **********************************************************/ /** * @brief Tell if at least one individual has at least one sequence. */ bool hasSequenceData() const; /** * @brief Tell if there is alelelic data. */ bool hasAlleleicData() const; }; } // end of namespace bpp; #endif // _DATASET_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/MonoAlleleMonolocusGenotype.h000644 000000 000000 00000006353 12147656633 024075 0ustar00rootroot000000 000000 // // File MonoAlleleMonolocusGenotype.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _MONOALLELEMONOLOCUSGENOTYPE_H_ #define _MONOALLELEMONOLOCUSGENOTYPE_H_ #include // From local #include "MonolocusGenotype.h" namespace bpp { /** * @brief The MonoAlleleMonolocusGenotype class. * * @author Sylvain Gaillard */ class MonoAlleleMonolocusGenotype : public MonolocusGenotype { private: size_t allele_index_; public: // Constructors and destructor /** * @brief Build a monolocus genotype containing one allele. */ MonoAlleleMonolocusGenotype(size_t allele_index); /** * @brief Build a monolocus genotype containing one allele. */ MonoAlleleMonolocusGenotype(std::vector allele_index) throw (BadSizeException); /** * @brief Copy constructor. */ MonoAlleleMonolocusGenotype(const MonoAlleleMonolocusGenotype& mmg); /** * @brief Destroy the MonoAlleleMonolocusGenotype. */ ~MonoAlleleMonolocusGenotype(); public: // Other methodes /** * @brief The affectation operator. */ MonoAlleleMonolocusGenotype& operator=(const MonoAlleleMonolocusGenotype& mmg); /** * @brief The == operator. */ virtual bool operator==(const MonoAlleleMonolocusGenotype& mmg) const; /** * @name The MonolocusGenotype interface: * * @{ */ std::vector getAlleleIndex() const; /** @} */ /** * @name The Clonable interface: * * @{ */ MonoAlleleMonolocusGenotype* clone() const; /** @} */ }; } // end of namespace bpp; #endif // _MONOALLELEMONOLOCUSGENOTYPE_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/BiAlleleMonolocusGenotype.h000644 000000 000000 00000007120 12147656633 023510 0ustar00rootroot000000 000000 // // File BiAlleleMonolocusGenotype.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ // Secured inclusion of header's file #ifndef _BIALLELEMONOLOCUSGENOTYPE_H_ #define _BIALLELEMONOLOCUSGENOTYPE_H_ // From STL #include #include // From local #include "MonolocusGenotype.h" namespace bpp { /** * @brief The BiAlleleMonolocusGenotype class. * * @author Sylvain Gaillard */ class BiAlleleMonolocusGenotype : public MonolocusGenotype { private: std::vector allele_index_; public: // Constructors and destructor /** * @brief Build a monolocus genotype containing two alleles. */ BiAlleleMonolocusGenotype(size_t first_allele_index, size_t second_allele_index); /** * @brief Build a monolocus genotype containing two alleles. */ BiAlleleMonolocusGenotype(std::vector allele_index) throw (BadSizeException); /** * @brief Copy constructor. */ BiAlleleMonolocusGenotype(const BiAlleleMonolocusGenotype& bmg); /** * @brief Destroy the BiAlleleMonolocusGenotype. */ ~BiAlleleMonolocusGenotype(); public: // Other methodes /** * @brief The affectation operator. */ BiAlleleMonolocusGenotype& operator=(const BiAlleleMonolocusGenotype& bmg); /** * @brief The == operator. */ bool operator==(const BiAlleleMonolocusGenotype& bmg) const; /** * @brief Get the first allele index. */ size_t getFirstAlleleIndex() const; /** * @brief Get the second allele index. */ size_t getSecondAlleleIndex() const; /** * @brief Test the homozygozity of the locus. */ bool isHomozygous() const; /** * @name The MonolocusGenotype interface: * * @{ */ std::vector getAlleleIndex() const; /** @} */ /** * @name The Clonable interface: * * @{ */ BiAlleleMonolocusGenotype* clone() const; /** @} */ }; } // end of namespace bpp; #endif // _BIALLELEMONOLOCUSGENOTYPE_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/GeneralExceptions.cpp000644 000000 000000 00000032366 12147656633 022411 0ustar00rootroot000000 000000 // // File GeneralExceptions.cpp // Author : Sylvain Gaillard // Last modification: Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "GeneralExceptions.h" #include using namespace bpp; using namespace std; // ** BadIdentifierException **************************************************/ BadIdentifierException::BadIdentifierException(const char* text, const size_t id) : Exception("BadIdentifierException: " + string(text) + "(" + TextTools::toString(id) + ")"), id_(TextTools::toString(id)) {} BadIdentifierException::BadIdentifierException(const std::string& text, const size_t id) : Exception("BadIdentifierException: " + text + "(" + TextTools::toString(id) + ")"), id_(TextTools::toString(id)) {} BadIdentifierException::BadIdentifierException(const char* text, const std::string& id) : Exception("BadIdentifierException: " + string(text) + "(" + id + ")"), id_(id) {} BadIdentifierException::BadIdentifierException(const std::string& text, const std::string& id) : Exception("BadIdentifierException: " + text + "(" + id + ")"), id_(id) {} BadIdentifierException::~BadIdentifierException() throw () {} const std::string BadIdentifierException::getIdentifier() const { return id_; } // ** LocusNotFoundException **************************************************/ LocusNotFoundException::LocusNotFoundException(const char* text, const size_t id) : BadIdentifierException("LocusNotFoundException: " + string(text) + "(" + TextTools::toString(id) + ")", id) {} LocusNotFoundException::LocusNotFoundException(const std::string& text, const size_t id) : BadIdentifierException("LocusNotFoundException: " + text + "(" + TextTools::toString(id) + ")", id) {} LocusNotFoundException::LocusNotFoundException(const char* text, const std::string& id) : BadIdentifierException("LocusNotFoundException: " + string(text) + "(" + id + ")", id) {} LocusNotFoundException::LocusNotFoundException(const std::string& text, const std::string& id) : BadIdentifierException("LocusNotFoundException: " + text + "(" + id + ")", id) {} LocusNotFoundException::~LocusNotFoundException() throw () {} const std::string LocusNotFoundException::getIdentifier() const { return BadIdentifierException::getIdentifier(); } // ** AlleleNotFoundException **************************************************/ AlleleNotFoundException::AlleleNotFoundException(const char* text, const size_t id) : BadIdentifierException("AlleleNotFoundException: " + string(text) + "(" + TextTools::toString(id) + ")", id) {} AlleleNotFoundException::AlleleNotFoundException(const std::string& text, const size_t id) : BadIdentifierException("AlleleNotFoundException: " + text + "(" + TextTools::toString(id) + ")", id) {} AlleleNotFoundException::AlleleNotFoundException(const char* text, const std::string& id) : BadIdentifierException("AlleleNotFoundException: " + string(text) + "(" + id + ")", id) {} AlleleNotFoundException::AlleleNotFoundException(const std::string& text, const std::string& id) : BadIdentifierException("AlleleNotFoundException: " + text + "(" + id + ")", id) {} AlleleNotFoundException::~AlleleNotFoundException() throw () {} const std::string AlleleNotFoundException::getIdentifier() const { return BadIdentifierException::getIdentifier(); } // ** LocalityNotFoundException **************************************************/ LocalityNotFoundException::LocalityNotFoundException(const char* text, const size_t id) : BadIdentifierException("LocalityNotFoundException: " + string(text) + "(" + TextTools::toString(id) + ")", id) {} LocalityNotFoundException::LocalityNotFoundException(const std::string& text, const size_t id) : BadIdentifierException("LocalityNotFoundException: " + text + "(" + TextTools::toString(id) + ")", id) {} LocalityNotFoundException::LocalityNotFoundException(const char* text, const std::string& id) : BadIdentifierException("LocalityNotFoundException: " + string(text) + "(" + id + ")", id) {} LocalityNotFoundException::LocalityNotFoundException(const std::string& text, const std::string& id) : BadIdentifierException("LocalityNotFoundException: " + text + "(" + id + ")", id) {} LocalityNotFoundException::~LocalityNotFoundException() throw () {} const std::string LocalityNotFoundException::getIdentifier() const { return BadIdentifierException::getIdentifier(); } // ** IndividualNotFoundException **************************************************/ IndividualNotFoundException::IndividualNotFoundException(const char* text, const size_t id) : BadIdentifierException("IndividualNotFoundException: " + string(text) + "(" + TextTools::toString(id) + ")", id) {} IndividualNotFoundException::IndividualNotFoundException(const std::string& text, const size_t id) : BadIdentifierException("IndividualNotFoundException: " + text + "(" + TextTools::toString(id) + ")", id) {} IndividualNotFoundException::IndividualNotFoundException(const char* text, const std::string& id) : BadIdentifierException("IndividualNotFoundException: " + string(text) + "(" + id + ")", id) {} IndividualNotFoundException::IndividualNotFoundException(const std::string& text, const std::string& id) : BadIdentifierException("IndividualNotFoundException: " + text + "(" + id + ")", id) {} IndividualNotFoundException::~IndividualNotFoundException() throw () {} const std::string IndividualNotFoundException::getIdentifier() const { return BadIdentifierException::getIdentifier(); } // ** GroupNotFoundException **************************************************/ GroupNotFoundException::GroupNotFoundException(const char* text, const size_t id) : BadIdentifierException("GroupNotFoundException: " + string(text) + "(" + TextTools::toString(id) + ")", id) {} GroupNotFoundException::GroupNotFoundException(const std::string& text, const size_t id) : BadIdentifierException("GroupNotFoundException: " + text + "(" + TextTools::toString(id) + ")", id) {} GroupNotFoundException::GroupNotFoundException(const char* text, const std::string& id) : BadIdentifierException("GroupNotFoundException: " + string(text) + "(" + id + ")", id) {} GroupNotFoundException::GroupNotFoundException(const std::string& text, const std::string& id) : BadIdentifierException("GroupNotFoundException: " + text + "(" + id + ")", id) {} GroupNotFoundException::~GroupNotFoundException() throw () {} const std::string GroupNotFoundException::getIdentifier() const { return BadIdentifierException::getIdentifier(); } bpp-popgen-2.1.0/src/Bpp/PopGen/AbstractIDataSet.h000644 000000 000000 00000004744 12147656633 021560 0ustar00rootroot000000 000000 // // File AbstractIDataSet.h // Author : Sylvain Gaillard // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _ABSTRACTIDATASET_H_ #define _ABSTRACTIDATASET_H_ #include "IDataSet.h" #include namespace bpp { /** * @brief Partial implementation of the DataSet Input interface * * @author Sylvain Gaillard */ class AbstractIDataSet : public IDataSet { public: // Class destructor virtual ~AbstractIDataSet(); public: /** * @name The IDataSet interface. * @{ */ virtual void read(std::istream& is, DataSet& data_set) throw (Exception) = 0; virtual void read(const std::string& path, DataSet& data_set) throw (Exception); virtual DataSet* read(std::istream& is) throw (Exception); virtual DataSet* read(const std::string& path) throw (Exception); /** * @} */ }; } // end of namespace bpp; #endif // _ABSTRACTIDATASET_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/DarwinVarSingle.h000644 000000 000000 00000005643 12147656633 021474 0ustar00rootroot000000 000000 // // File DarwinVarSingle.h // Author : Sylvain Gaillard // Last modification : April 7, 2008 // /* Copyright or © or Copr. Bio++ Development Team, (April 7, 2008) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _DARWIN_VAR_SINGLE_H_ #define _DARWIN_VAR_SINGLE_H_ #include #include #include #include // From local Pop #include "AbstractODataSet.h" namespace bpp { /** * @brief The Darwin .don output format for popgenlib. * * @author Sylvain Gaillard */ class DarwinVarSingle : public virtual AbstractODataSet { private: size_t missingData_; public: // Constructor and destructor DarwinVarSingle(size_t missingData = 999); ~DarwinVarSingle(); public: /** * @name The ODataSet interface. * @{ */ void write(std::ostream& os, const DataSet& data_set) const throw (Exception); void write(const std::string& path, const DataSet& data_set, bool overwrite) const throw (Exception); /** * @} */ /** * @name The IOFormat interface * @{ */ virtual const std::string getFormatName() const { return "Darwin .var single data"; } virtual const std::string getFormatDescription() const { return "Darwin .var file store data for each marker in each individual (1 variable per allele)."; } /** * @} */ }; } // end of namespace bpp; #endif // _DARWIN_VAR_SINGLE_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/MultilocusGenotypeStatistics.h000644 000000 000000 00000032677 12147656633 024372 0ustar00rootroot000000 000000 // // File MultilocusGenotypeStatistics.h // Authors : Sylvain Gaillard // Khalid Belkhir // Last modification : Wednesday August 04 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _MULTILOCUSGENOTYPESTATISTICS_H_ #define _MULTILOCUSGENOTYPESTATISTICS_H_ // From STL #include #include #include #include #include #include // From SeqLib #include // From popgenlib #include "PolymorphismMultiGContainer.h" #include "MultilocusGenotype.h" #include "GeneralExceptions.h" namespace bpp { /** * @brief The MultilocusGenotypeStatistics class * * This class is a set of static method for PolymorphismMultiGContainer. * * @author Sylvain Gaillard */ class MultilocusGenotypeStatistics { public: struct VarComp { double a; double b; double c; }; struct Fstats { double Fit; double Fst; double Fis; }; struct PermResults { double Statistic; double Percent_sup; double Percent_inf; }; /** * @brief Get the alleles' id at one locus for a set of groups. * * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype. */ static std::vector getAllelesIdsForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (IndexOutOfBoundsException); /** * @brief Count the number of allele (gametes) at a locus for a set of groups. * * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype. */ static size_t countGametesForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (IndexOutOfBoundsException); /** * @brief Get a map of allele count for a set of groups. * * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype. */ static std::map getAllelesMapForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (IndexOutOfBoundsException); /** * @brief Get the alleles frequencies at one locus for a set of groups. * * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype. * @throw ZeroDivisionException if the number of considered alleles = 0. */ static std::map getAllelesFrqForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (Exception); /** * @brief Count the number of non-missing data at a given locus for a set of groups. * * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype. */ static size_t countNonMissingForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (IndexOutOfBoundsException); /** * @brief Counr the number of bi-allelic MonolocusGenotype at a given locus for a set of groups. * * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype. */ static size_t countBiAllelicForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (IndexOutOfBoundsException); /** * @brief Count how many times each allele is found in an heterozygous MonolocusGenotype in a set of groups. * * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype. */ static std::map countHeterozygousForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (IndexOutOfBoundsException); /** * @brief Get the heterozygous frequencies for each allele at a locus in a set of groups. * * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype. * @throw ZeroDivisionException if the number of considered alleles = 0. */ static std::map getHeterozygousFrqForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (Exception); /** * @brief Compute the observed heterozygosity for one locus. * * This is the mean value of the getHeterozygousFrqForGroups map. * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype. * @throw ZeroDivisionException if the number of considered alleles = 0. */ static double getHobsForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (Exception); /** * @brief Compute the expected heterozygosity for one locus. * * Nei 1977 * @f[ * H_{exp}=1-\sum_{i=1}^{n}x_i^2 * @f] * where @f$x_i@f$ is the frequency of the ith allele and @f$n@f$ the number of alleles. * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype. * @throw ZeroDivisionException if the number of considered alleles = 0. */ static double getHexpForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (Exception); /** * @brief Compute the expected non biased heterozygosity for one locus. * * Nei 1978 * @f[ * H_{nb}=\frac{2n}{2n-1}\left(1-\sum_{i=1}^{n}x_i^2\right)=\frac{2n}{2n-1}H_{exp} * @f] * where @f$x_i@f$ is the frequency of the ith allele and @f$n@f$ the number of alleles. * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype. * @throw ZeroDivisionException if the number of considered alleles = 0. */ static double getHnbForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (Exception); /** * @brief Compute the Nei distance between two groups at one locus. * * Nei 1972 * @f[ * \hat{D}_1=-\ln \left[\frac{\displaystyle\sum_{i=1}^{n}\left(x_i\times y_i\right)} * {\sqrt{\displaystyle\sum_{i=1}^{n}x_i^2\times \displaystyle\sum_{i=1}^{n}y_i^2}}\right] * @f] * where @f$x_i@f$ and @f$y_i@f$ are respectively the ith allele's frequency of the first and second group * and @f$n@f$ the total number of alleles of both groups. * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype. * @throw ZeroDivisionException if the number of considered alleles = 0. */ static double getDnei72(const PolymorphismMultiGContainer& pmgc, std::vector locus_positions, size_t grp1, size_t grp2) throw (Exception); /** * @brief Compute the Nei unbiased distance between two groups at a given number of loci. * * Nei 1978 * @f[ * \hat{D}=-\ln \left[\frac{\displaystyle\sum_{i=1}^{n}\left(x_i\times y_i\right)} * {\sqrt{\frac{2n_XJ_X-1}{2n_X-1}\times\frac{2n_YJ_Y-1}{2n_YJ_Y}}} * \right] * @f] * where @f$x_i@f$ and @f$y_i@f$ are respectively the ith allele's frequency of the first and second group, * @f$n@f$ the total number of alleles of both groups, @f$n_X@f$ and @f$n_Y@f$ the number of alleles in the first and second group * and * @f[ * J_X=\sum_{i=1}^{n}x_i^2 * \qquad\textrm{and}\qquad * J_Y=\sum_{i=1}^{n}y_i^2 * @f] * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype. * @throw ZeroDivisionException if the number of considered alleles = 0. */ static double getDnei78(const PolymorphismMultiGContainer& pmgc, std::vector locus_positions, size_t grp1, size_t grp2) throw (Exception); /** * @brief Compute the three F statistics of Weir and Cockerham for each allele of a given locus. */ static std::map getAllelesFstats(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (Exception); /** * @brief Compute the Weir and Cockerham Fit on a set of groups for each allele of a given locus. */ static std::map getAllelesFit(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (Exception); /** * @brief Compute the Weir and Cockerham @f$\theta@f$ on a set of groups for each allele of a given locus. */ static std::map getAllelesFst(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (Exception); /** * @brief Compute the Weir and Cockerham Fis on a set of groups for each allele of a given locus. */ static std::map getAllelesFis(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (Exception); /** * @brief Get the variance components a, b and c (Weir and Cockerham, 1983). */ static std::map getVarianceComponents(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set& groups) throw (ZeroDivisionException); /** * @brief Compute the Weir and Cockerham @f$\theta{wc}@f$ on a set of groups for a given set of loci. * The variance componenets for each allele are calculated and then combined over loci using Weir and Cockerham weighting. */ static double getWCMultilocusFst(const PolymorphismMultiGContainer& pmgc, std::vector locus_positions, const std::set& groups) throw (Exception); /** * @brief Compute the Weir and Cockerham Fis on a set of groups for a given set of loci. * The variance componenets for each allele are calculated and then combined over loci using Weir and Cockerham weighting. */ static double getWCMultilocusFis(const PolymorphismMultiGContainer& pmgc, std::vector locus_positions, const std::set& groups) throw (Exception); /** * @brief Compute the Weir and Cockerham @f$\theta_{wc}@f$ on a set of groups for a given set of loci and make a permutation test. * Multilocus @f$\theta@f$ is calculated as in getWCMultilocusFst on the original data set and on nb_perm data sets obtained after * a permutation of individuals between the different groups. * Return values are theta, % of values > theta and % of values < theta. */ static PermResults getWCMultilocusFstAndPerm(const PolymorphismMultiGContainer& pmgc, std::vector locus_positions, std::set groups, int nb_perm) throw (Exception); /** * @brief Compute the Weir and Cockerham Fis on a set of groups for a given set of loci and make a permutation test. * Multilocus Fis is calculated as in getWCMultilocusFis on the original data set and on nb_perm data sets obtained after * a permutation of alleles between individual of each group. * Return values are Fis, % of values > Fis and % of values < Fis. */ static PermResults getWCMultilocusFisAndPerm(const PolymorphismMultiGContainer& pmgc, std::vector locus_positions, std::set groups, int nb_perm) throw (Exception); /** * @brief Compute the @f$\theta_{RH}@f$ on a set of groups for a given set of loci. * The variance componenets for each allele are calculated and then combined over loci using RH weighting with alleles frequency. */ static double getRHMultilocusFst(const PolymorphismMultiGContainer& pmgc, std::vector locus_positions, const std::set& groups) throw (Exception); /** * @brief Compute pairwise distances on a set of groups for a given set of loci. * distance is either Nei72, Nei78, Fst W&C or Fst Robertson & Hill, Nm, * D=-ln(1-Fst) of Reynolds et al. 1983, Rousset 1997 Fst/(1-Fst) */ static std::auto_ptr getDistanceMatrix(const PolymorphismMultiGContainer& pmgc, std::vector locus_positions, const std::set& groups, std::string distance_methode) throw (Exception); }; } // end of namespace bpp; #endif // _MULTILOCUSGENOTYPESTATISTICS_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/PolymorphismSequenceContainerTools.cpp000644 000000 000000 00000044565 12147656633 026055 0ustar00rootroot000000 000000 // // File: PolymorphismSequenceContainerTools.cpp // Authors: Eric Bazin // Sylvain Gaillard // Created on: Thursday July 29 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "PolymorphismSequenceContainerTools.h" using namespace bpp; using namespace std; PolymorphismSequenceContainerTools::~PolymorphismSequenceContainerTools() {} /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::read(const std::string& path, const Alphabet* alpha) throw (Exception) { Mase ms; string key; const OrderedSequenceContainer* seqc = 0; try { seqc = dynamic_cast(ms.readSequences(path, alpha )); } catch (Exception& e) { if (seqc != 0) delete seqc; throw e; } PolymorphismSequenceContainer* psc = new PolymorphismSequenceContainer(*seqc); Comments maseFileHeader = seqc->getGeneralComments(); delete seqc; map groupMap = MaseTools::getAvailableSequenceSelections(maseFileHeader); for (map::iterator mi = groupMap.begin(); mi != groupMap.end(); mi++) { key = mi->first; if (key.compare(0, 8, "OUTGROUP") == 0) { SequenceSelection ss; try { ss = MaseTools::getSequenceSet(maseFileHeader, key); } catch (IOException& ioe) { delete psc; throw ioe; } for (size_t i = 0; i != ss.size(); i++) { try { psc->setAsOutgroupMember(ss[i]); } catch (SequenceNotFoundException& snfe) { delete psc; throw snfe; } } } } return psc; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::extractIngroup (const PolymorphismSequenceContainer& psc) throw (Exception) { SequenceSelection ss; PolymorphismSequenceContainer* psci = dynamic_cast(psc.clone()); for (size_t i = 0; i < psc.getNumberOfSequences(); i++) { if (!psc.isIngroupMember(i)) ss.push_back(i); } if (ss.size() == psc.getNumberOfSequences()) { delete psci; throw Exception("PolymorphismSequenceContainerTools::extractIngroup: no Ingroup sequences found."); } for (size_t i = ss.size(); i > 0; --i) { psci->deleteSequence(ss[i - 1]); } return psci; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::extractOutgroup(const PolymorphismSequenceContainer& psc) throw (Exception) { SequenceSelection ss; PolymorphismSequenceContainer* psci = dynamic_cast(psc.clone()); for (size_t i = 0; i < psc.getNumberOfSequences(); i++) { if (psc.isIngroupMember(i) ) ss.push_back(i); } if (ss.size() == psc.getNumberOfSequences()) { delete psci; throw Exception("PolymorphismSequenceContainerTools::extractOutgroup: no Outgroup sequences found."); } for (size_t i = ss.size(); i > 0; i--) { psci->deleteSequence(ss[i - 1]); } return psci; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::extractGroup(const PolymorphismSequenceContainer& psc, size_t group_id) throw (Exception) { SequenceSelection ss; PolymorphismSequenceContainer* psci = dynamic_cast(psc.clone()); for (size_t i = 0; i < psc.getNumberOfSequences(); i++) { if (psc.getGroupId(i) != group_id) ss.push_back(i); } if (ss.size() == psc.getNumberOfSequences()) { delete psci; throw GroupNotFoundException("PolymorphismSequenceContainerTools::extractGroup: group_id not found.", group_id); } for (size_t i = ss.size(); i > 0; i--) { psci->deleteSequence(ss[i - 1]); } return psci; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getSelectedSequences(const PolymorphismSequenceContainer& psc, const SequenceSelection& ss) { PolymorphismSequenceContainer* newpsc = new PolymorphismSequenceContainer(psc.getAlphabet()); for (size_t i = 0; i < ss.size(); i++) { newpsc->addSequence(psc.getSequence(ss[i]), psc.getSequenceCount(i), false); if (psc.isIngroupMember(i)) newpsc->setAsIngroupMember(i); else { newpsc->setAsOutgroupMember(i); newpsc->setGroupId(i, psc.getGroupId(i)); } } newpsc->setGeneralComments(psc.getGeneralComments()); return newpsc; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::sample(const PolymorphismSequenceContainer& psc, size_t n, bool replace) { size_t nbSeq = psc.getNumberOfSequences(); vector v; for (size_t i = 0; i < nbSeq; ++i) { v.push_back(i); } vector vv(n); RandomTools::getSample(v, vv, replace); PolymorphismSequenceContainer* newpsc = PolymorphismSequenceContainerTools::getSelectedSequences(psc, vv); return newpsc; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getSitesWithoutGaps (const PolymorphismSequenceContainer& psc) { vector seqNames = psc.getSequencesNames(); PolymorphismSequenceContainer* noGapCont = new PolymorphismSequenceContainer(psc.getNumberOfSequences(), psc.getAlphabet()); noGapCont->setSequencesNames(seqNames, false); size_t nbSeq = psc.getNumberOfSequences(); for (size_t i = 0; i < nbSeq; i++) { noGapCont->setSequenceCount(i, psc.getSequenceCount(i)); if (psc.isIngroupMember(i)) noGapCont->setAsIngroupMember(i); else { noGapCont->setAsOutgroupMember(i); noGapCont->setGroupId(i, psc.getGroupId(i)); } } NoGapSiteContainerIterator ngsi(psc); while (ngsi.hasMoreSites()) noGapCont->addSite(*ngsi.nextSite()); return noGapCont; } /******************************************************************************/ size_t PolymorphismSequenceContainerTools::getNumberOfNonGapSites(const PolymorphismSequenceContainer& psc, bool ingroup) throw (Exception) { size_t count = psc.getNumberOfSites(); PolymorphismSequenceContainer* npsc = 0; SimpleSiteContainerIterator* ssi; if (ingroup) { try { npsc = extractIngroup(psc); } catch (Exception& e) { if (npsc != NULL) delete npsc; throw e; } ssi = new SimpleSiteContainerIterator(*npsc); } else ssi = new SimpleSiteContainerIterator(psc); while (ssi->hasMoreSites()) if (SiteTools::hasGap(*ssi->nextSite())) count--; delete ssi; return count; } /******************************************************************************/ size_t PolymorphismSequenceContainerTools::getNumberOfCompleteSites(const PolymorphismSequenceContainer& psc, bool ingroup) throw (Exception) { size_t count = psc.getNumberOfSites(); PolymorphismSequenceContainer* npsc = 0; SimpleSiteContainerIterator* ssi; if (ingroup) { try { npsc = extractIngroup(psc); } catch (Exception& e) { if (npsc != NULL) delete npsc; throw e; } ssi = new SimpleSiteContainerIterator(*npsc); } else ssi = new SimpleSiteContainerIterator(psc); while (ssi->hasMoreSites()) if (!SiteTools::isComplete(*ssi->nextSite())) count--; delete ssi; return count; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getCompleteSites (const PolymorphismSequenceContainer& psc) { vector seqNames = psc.getSequencesNames(); PolymorphismSequenceContainer* complete = new PolymorphismSequenceContainer(psc.getNumberOfSequences(), psc.getAlphabet()); complete->setSequencesNames(seqNames, false); size_t nbSeq = psc.getNumberOfSequences(); for (size_t i = 0; i < nbSeq; i++) { complete->setSequenceCount(i, psc.getSequenceCount(i)); if (psc.isIngroupMember(i)) complete->setAsIngroupMember(i); else { complete->setAsOutgroupMember(i); complete->setGroupId(i, psc.getGroupId(i)); } } CompleteSiteContainerIterator csi(psc); while (csi.hasMoreSites()) complete->addSite(*csi.nextSite()); return complete; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::excludeFlankingGap(const PolymorphismSequenceContainer& psc) { PolymorphismSequenceContainer* psci = dynamic_cast(psc.clone()); while (SiteTools::hasGap(psci->getSite(0))) psci->deleteSite(0); size_t i = 0; size_t n = psci->getNumberOfSites(); while (SiteTools::hasGap(psci->getSite(n - i - 1))) { psci->deleteSite(n - i - 1); i++; } return psci; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getSelectedSites(const PolymorphismSequenceContainer& psc, const std::string& setName, bool phase) { SiteContainer* pscc = MaseTools::getSelectedSites(psc, setName); Comments maseFileHeader = psc.getGeneralComments(); if (phase) { for (size_t i = 1; i < MaseTools::getPhase(maseFileHeader, setName); i++) { pscc->deleteSite(0); } } PolymorphismSequenceContainer* psci = new PolymorphismSequenceContainer(*pscc); for (size_t i = 0; i < psc.getNumberOfSequences(); i++) { if (psc.isIngroupMember(i)) psci->setAsIngroupMember(i); else { psci->setAsOutgroupMember(i); psci->setGroupId(i, psc.getGroupId(i)); } } psci->deleteGeneralComments(); delete pscc; return psci; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getNonCodingSites(const PolymorphismSequenceContainer& psc, const std::string& setName) { SiteSelection ss; Comments maseFileHeader = psc.getGeneralComments(); SiteSelection codss = MaseTools::getSiteSet(maseFileHeader, setName); for (size_t i = 0; i < psc.getNumberOfSites(); i++) { if (find(codss.begin(), codss.end(), i) == codss.end()) ss.push_back(i); } const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss); PolymorphismSequenceContainer* psci = new PolymorphismSequenceContainer(*sc); for (size_t i = 0; i < psc.getNumberOfSequences(); i++) { if (psc.isIngroupMember(i)) psci->setAsIngroupMember(i); else { psci->setAsOutgroupMember(i); psci->setGroupId(i, psc.getGroupId(i)); } } delete sc; return psci; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getOnePosition(const PolymorphismSequenceContainer& psc, const std::string& setName, size_t pos) { Comments maseFileHeader = psc.getGeneralComments(); size_t start; try { start = MaseTools::getPhase(maseFileHeader, setName); } catch (Exception& e) { start = 1; } SiteSelection ss; size_t i; if ((int)pos - (int)start >= 0) i = pos - start; else i = pos - start + 3; while (i < psc.getNumberOfSites()) { ss.push_back(i); i += 3; } const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss); PolymorphismSequenceContainer* newpsc = new PolymorphismSequenceContainer(*sc); for (size_t j = 0; j < psc.getNumberOfSequences(); j++) { if (psc.isIngroupMember(j)) newpsc->setAsIngroupMember(j); else { newpsc->setAsOutgroupMember(j); newpsc->setGroupId(i, psc.getGroupId(j)); } } delete sc; return newpsc; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getIntrons(const PolymorphismSequenceContainer& psc, const std::string& setName, const CodonAlphabet* ca ) { Comments maseFileHeader = psc.getGeneralComments(); SiteSelection ss; SiteSelection codss = MaseTools::getSiteSet(maseFileHeader, setName); size_t start; try { start = MaseTools::getPhase(maseFileHeader, setName); } catch (Exception& e) { throw e; } size_t first = 0, last = psc.getNumberOfSites(); // Check if the first codon is AUG if (start == 1 && psc.getSite(codss[0]).getValue(0) == 0 && psc.getSite(codss[1]).getValue(0) == 3 && psc.getSite(codss[2]).getValue(0) == 2) first = codss[0]; // Check if the last codon is a STOP one int c1 = psc.getSite(codss[codss.size() - 3]).getValue(0); int c2 = psc.getSite(codss[codss.size() - 2]).getValue(0); int c3 = psc.getSite(codss[codss.size() - 1]).getValue(0); if (ca->isStop(ca->getCodon(c1, c2, c3))) last = codss[codss.size() - 1]; // Keep sites between AUG and STOP for (size_t i = first; i < last; i++) { if (find(codss.begin(), codss.end(), i) == codss.end()) { ss.push_back(i); } } const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss); PolymorphismSequenceContainer* psci = new PolymorphismSequenceContainer(*sc); for (size_t i = 0; i < psc.getNumberOfSequences(); i++) { if (psc.isIngroupMember(i)) psci->setAsIngroupMember(i); else { psci->setAsOutgroupMember(i); psci->setGroupId(i, psc.getGroupId(i)); } } delete sc; return psci; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::get5Prime(const PolymorphismSequenceContainer& psc, const std::string& setName) { Comments maseFileHeader = psc.getGeneralComments(); SiteSelection ss; SiteSelection codss = MaseTools::getSiteSet(maseFileHeader, setName); size_t start = MaseTools::getPhase(maseFileHeader, setName); size_t last = 0; // Check if the first Codon is AUG if (start == 1 && psc.getSite(codss[0]).getValue(0) == 0 && psc.getSite(codss[1]).getValue(0) == 3 && psc.getSite(codss[2]).getValue(0) == 2) last = codss[0]; for (size_t i = 0; i < last; i++) { if (find(codss.begin(), codss.end(), i) == codss.end()) { ss.push_back(i); } } const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss); PolymorphismSequenceContainer* psci = new PolymorphismSequenceContainer(*sc); for (size_t i = 0; i < psc.getNumberOfSequences(); i++) { if (psc.isIngroupMember(i)) psci->setAsIngroupMember(i); else { psci->setAsOutgroupMember(i); psci->setGroupId(i, psc.getGroupId(i)); } } delete sc; return psci; } /******************************************************************************/ PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::get3Prime(const PolymorphismSequenceContainer& psc, const std::string& setName, const CodonAlphabet* ca ) { Comments maseFileHeader = psc.getGeneralComments(); SiteSelection ss; SiteSelection codss = MaseTools::getSiteSet(maseFileHeader, setName); size_t first = psc.getNumberOfSites() - 1; // Check if the last codon is a STOP one int c1 = psc.getSite(codss[codss.size() - 3]).getValue(0); int c2 = psc.getSite(codss[codss.size() - 2]).getValue(0); int c3 = psc.getSite(codss[codss.size() - 1]).getValue(0); if (ca->isStop(ca->getCodon(c1, c2, c3))) first = codss[codss.size() - 1]; for (size_t i = first; i < psc.getNumberOfSites(); i++) { if (find(codss.begin(), codss.end(), i) == codss.end()) { ss.push_back(i); } } const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss); PolymorphismSequenceContainer* psci = new PolymorphismSequenceContainer(*sc); for (size_t i = 0; i < psc.getNumberOfSequences(); i++) { if (psc.isIngroupMember(i)) psci->setAsIngroupMember(i); else { psci->setAsOutgroupMember(i); psci->setGroupId(i, psc.getGroupId(i)); } } delete sc; return psci; } /******************************************************************************/ string PolymorphismSequenceContainerTools::getIngroupSpeciesName(const PolymorphismSequenceContainer& psc) { string key; string speciesName; Comments maseFileHeader = psc.getGeneralComments(); if (!maseFileHeader.size()) return speciesName; map groupMap = MaseTools::getAvailableSequenceSelections(maseFileHeader); for (map::iterator mi = groupMap.begin(); mi != groupMap.end(); mi++) { key = mi->first; if (key.compare(0, 7, "INGROUP") == 0) { StringTokenizer* sptk = new StringTokenizer(key, "_"); speciesName = sptk->getToken(1) + " " + sptk->getToken(2); } } return speciesName; } /******************************************************************************/ bpp-popgen-2.1.0/src/Bpp/PopGen/MultiSeqIndividual.cpp000644 000000 000000 00000032400 12147656633 022533 0ustar00rootroot000000 000000 // // File MultiSeqIndividual.cpp // Author : Sylvain Gaillard // Last modification : Tuesday August 03 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "MultiSeqIndividual.h" using namespace bpp; using namespace std; // ** Class constructor: *******************************************************/ MultiSeqIndividual::MultiSeqIndividual() : id_(""), sex_(0), date_(0), coord_(0), locality_(0), sequences_(map()), genotype_(0) {} MultiSeqIndividual::MultiSeqIndividual(const std::string& id) : id_(id), sex_(0), date_(0), coord_(0), locality_(0), sequences_(map()), genotype_(0) {} MultiSeqIndividual::MultiSeqIndividual( const std::string& id, const Date& date, const Point2D& coord, Locality* locality, const unsigned short sex) : id_(id), sex_(sex), date_(new Date(date)), coord_(new Point2D(coord)), locality_(locality), sequences_(map()), genotype_(0) {} MultiSeqIndividual::MultiSeqIndividual(const MultiSeqIndividual& ind) : id_(ind.getId()), sex_(ind.getSex()), date_(0), coord_(0), locality_(0), sequences_(map()), genotype_(0) { try { setDate(*ind.getDate()); } catch (NullPointerException) { date_ = 0; } try { setCoord(*ind.getCoord()); } catch (NullPointerException) { coord_ = 0; } try { setLocality(ind.getLocality()); } catch (NullPointerException) { locality_ = 0; } if (ind.hasSequences()) { vector keys = ind.getSequencesKeys(); for (size_t i = 0; i < keys.size(); i++) { sequences_[keys[i]] = new VectorSequenceContainer(*const_cast(ind.getVectorSequenceContainer(keys[i]))); } } genotype_ = ind.hasGenotype() ? new MultilocusGenotype(*ind.getGenotype()) : 0; } // ** Class destructor: *******************************************************/ MultiSeqIndividual::~MultiSeqIndividual() { delete date_; delete coord_; } // ** Other methodes: *********************************************************/ MultiSeqIndividual& MultiSeqIndividual::operator=(const MultiSeqIndividual& ind) { setId(ind.getId()); setSex(ind.getSex()); try { setDate(*ind.getDate()); } catch (NullPointerException) { date_ = 0; } try { setCoord(*ind.getCoord()); } catch (NullPointerException) { coord_ = 0; } try { setLocality(ind.getLocality()); } catch (NullPointerException) { locality_ = 0; } if (ind.hasSequences()) { vector keys = ind.getSequencesKeys(); for (size_t i = 0; i < keys.size(); i++) { sequences_[keys[i]] = new VectorSequenceContainer(*const_cast(ind.getVectorSequenceContainer(keys[i]))); } } genotype_ = ind.hasGenotype() ? new MultilocusGenotype(*ind.getGenotype()) : 0; return *this; } /******************************************************************************/ // Id void MultiSeqIndividual::setId(const std::string id) { id_ = id; } /******************************************************************************/ std::string MultiSeqIndividual::getId() const { return id_; } /******************************************************************************/ // Sex void MultiSeqIndividual::setSex(const unsigned short sex) { sex_ = sex; } /******************************************************************************/ unsigned short MultiSeqIndividual::getSex() const { return sex_; } /******************************************************************************/ // Date void MultiSeqIndividual::setDate(const Date& date) { if (!hasDate()) { date_ = new Date(date); } else if (*date_ != date) { delete date_; date_ = new Date(date); } } /******************************************************************************/ const Date* MultiSeqIndividual::getDate() const throw (NullPointerException) { if (hasDate()) return new Date(*date_); else throw (NullPointerException("MultiSeqIndividual::getDate: no date associated to this individual.")); } /******************************************************************************/ bool MultiSeqIndividual::hasDate() const { return date_ != 0; } /******************************************************************************/ // Coord void MultiSeqIndividual::setCoord(const Point2D& coord) { if (!hasCoord()) { coord_ = new Point2D(coord); } else if (*coord_ != coord) { delete coord_; coord_ = new Point2D(coord); } } /******************************************************************************/ void MultiSeqIndividual::setCoord(const double x, const double y) { if (!hasCoord()) { coord_ = new Point2D(x, y); } else if (this->getX() != x || this->getY() != y) { delete coord_; coord_ = new Point2D(x, y); } } /******************************************************************************/ const Point2D* MultiSeqIndividual::getCoord() const throw (NullPointerException) { if (hasCoord()) return new Point2D(*coord_); else throw (NullPointerException("MultiSeqIndividual::getCoord: no coord associated to this individual.")); } /******************************************************************************/ bool MultiSeqIndividual::hasCoord() const { return coord_ != 0; } /******************************************************************************/ void MultiSeqIndividual::setX(const double x) throw (NullPointerException) { if (hasCoord()) coord_->setX(x); else throw (NullPointerException("MultiSeqIndividual::setX: no coord associated to this individual.")); } /******************************************************************************/ void MultiSeqIndividual::setY(const double y) throw (NullPointerException) { if (hasCoord()) coord_->setY(y); else throw (NullPointerException("MultiSeqIndividual::setY: no coord associated to this individual.")); } /******************************************************************************/ double MultiSeqIndividual::getX() const throw (NullPointerException) { if (hasCoord()) return coord_->getX(); else throw (NullPointerException("MultiSeqIndividual::getX: no coord associated to this individual.")); } /******************************************************************************/ double MultiSeqIndividual::getY() const throw (NullPointerException) { if (hasCoord()) return coord_->getY(); else throw (NullPointerException("MultiSeqIndividual::getY: no coord associated to this individual.")); } /******************************************************************************/ // Locality void MultiSeqIndividual::setLocality(const Locality* locality) { locality_ = locality; } /******************************************************************************/ const Locality* MultiSeqIndividual::getLocality() const throw (NullPointerException) { if (hasLocality()) return locality_; else throw (NullPointerException("MultiSeqIndividual::getLocality: no locality associated to this individual.")); } /******************************************************************************/ bool MultiSeqIndividual::hasLocality() const { return locality_ != 0; } /******************************************************************************/ // Sequences const VectorSequenceContainer* MultiSeqIndividual::getVectorSequenceContainer(const std::string& id) const throw (Exception) { map::const_iterator it; it = sequences_.find(id); // Test existence of id in the map. if (it == sequences_.end()) { string mes = "MultiSeqIndividual::getSequence: sequence set not found (" + id + ")."; throw (Exception(mes)); } return const_cast(it->second); } /******************************************************************************/ void MultiSeqIndividual::addSequence(const std::string& id, const Sequence& sequence) throw (Exception) { try { sequences_[id]->addSequence(sequence); } catch (AlphabetMismatchException& ame) { throw (AlphabetMismatchException("MultiSeqIndividual::addSequence: alphabets don't match.", ame.getAlphabets()[0], ame.getAlphabets()[1])); } catch (Exception& e) { throw (BadIdentifierException("MultiSeqIndividual::addSequence: sequence's name already in use.", sequence.getName())); } } /******************************************************************************/ const Sequence& MultiSeqIndividual::getSequence(const std::string& id, const std::string& name) const throw (Exception) { map::const_iterator it; it = sequences_.find(id); // Test existence of id in the map. if (it == sequences_.end()) { string mes = "MultiSeqIndividual::getSequence: sequence set not found (" + id + ")."; throw (Exception(mes)); } try { return const_cast(it->second)->getSequence(name); } catch (SequenceNotFoundException& snfe) { throw (snfe); } } /******************************************************************************/ const Sequence& MultiSeqIndividual::getSequence(const std::string& id, size_t i) const throw (Exception) { map::const_iterator it; it = sequences_.find(id); // Test existence of id in the map. if (it == sequences_.end()) { string mes = "MultiSeqIndividual::getSequence: sequence set not found (" + id + ")."; throw (Exception(mes)); } try { return const_cast(it->second)->getSequence(i); } catch (IndexOutOfBoundsException& ioobe) { throw (ioobe); } } /******************************************************************************/ std::vector MultiSeqIndividual::getSequencesKeys() const { vector keys; map::const_iterator it; for (it = sequences_.begin(); it != sequences_.end(); it++) { keys.push_back(it->first); } return keys; } /******************************************************************************/ bool MultiSeqIndividual::hasSequences() const { return sequences_.size() != 0; } /******************************************************************************/ size_t MultiSeqIndividual::getNumberOfSequenceSet() const { return sequences_.size(); } /******************************************************************************/ size_t MultiSeqIndividual::getNumberOfSequences(const std::string& id) const throw (Exception) { map::const_iterator it; it = sequences_.find(id); // Test existence of id in the map. if (it == sequences_.end()) { string mes = "MultiSeqIndividual::getSequence: sequence set not found (" + id + ")."; throw (Exception(mes)); } return const_cast(it->second)->getNumberOfSequences(); } /******************************************************************************/ // MultilocusGenotype void MultiSeqIndividual::addGenotype(const MultilocusGenotype& genotype) { genotype_ = new MultilocusGenotype(genotype); } /******************************************************************************/ const MultilocusGenotype* MultiSeqIndividual::getGenotype() const throw (NullPointerException) { return genotype_; } /******************************************************************************/ bool MultiSeqIndividual::hasGenotype() const { return genotype_ != 0; } /******************************************************************************/ bpp-popgen-2.1.0/src/Bpp/PopGen/MonolocusGenotypeTools.cpp000644 000000 000000 00000005150 12147656633 023473 0ustar00rootroot000000 000000 // // File MonolocusGenotypeTools.cpp // Author: Sylvain Gaillard // Created on: April 4, 2008 // /* Copyright or © or Copr. Bio++ Development Team, (April 4, 2008) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ // From Pop #include "MonoAlleleMonolocusGenotype.h" #include "BiAlleleMonolocusGenotype.h" #include "MultiAlleleMonolocusGenotype.h" #include "MonolocusGenotypeTools.h" using namespace bpp; using namespace std; std::auto_ptr MonolocusGenotypeTools::buildMonolocusGenotypeByAlleleKey(const std::vector allele_keys) throw (Exception) { if (allele_keys.size() < 1) throw Exception("MonolocusGenotypeTools::buildMonolocusGenotypeByAlleleKey: no key in allele_keys."); if (allele_keys.size() == 1) return auto_ptr(new MonoAlleleMonolocusGenotype(allele_keys)); if (allele_keys.size() == 2) return auto_ptr(new BiAlleleMonolocusGenotype(allele_keys)); // for all other cases (allele_keys.size() > 2) return auto_ptr(new MultiAlleleMonolocusGenotype(allele_keys)); } bpp-popgen-2.1.0/src/Bpp/PopGen/GeneMapperCsvExport.cpp000644 000000 000000 00000017442 12147656633 022671 0ustar00rootroot000000 000000 // // File: GeneMapperCsvExport.cpp // Author: Sylvain Gaillard // Created: April 2, 2008 // /* Copyright or © or Copr. Bio++ Development Team, (April 2, 2008) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "GeneMapperCsvExport.h" using namespace bpp; using namespace std; const std::string GeneMapperCsvExport::SAMPLE_FILE_H = "Sample File"; const std::string GeneMapperCsvExport::SAMPLE_NAME_H = "Sample Name"; const std::string GeneMapperCsvExport::PANEL_H = "Panel"; const std::string GeneMapperCsvExport::MARKER_H = "Marker"; const std::string GeneMapperCsvExport::DYE_H = "Dye"; const std::string GeneMapperCsvExport::ALLELE_H = "Allele "; const std::string GeneMapperCsvExport::SIZE_H = "Size "; const std::string GeneMapperCsvExport::HEIGHT_H = "Height "; const std::string GeneMapperCsvExport::PEAK_AREA_H = "Peak Area "; const std::string GeneMapperCsvExport::DAC_H = "DAC"; const std::string GeneMapperCsvExport::AN_H = "AN"; GeneMapperCsvExport::GeneMapperCsvExport(bool ia) : IndependentAlleles_(ia) {} GeneMapperCsvExport::~GeneMapperCsvExport() {} void GeneMapperCsvExport::read(std::istream& is, DataSet& data_set) throw (Exception) { if (!is) throw IOException("GeneMapperCsvExport::read: fail to open stream."); /* * Feed a DataTable with the data */ DataTable* dtp = DataTable::read(is, "\t", true, -1); DataTable& dt = *dtp; /* * Fixe the individuals' name if there is duplicate in the file */ vector ind_names; vector markers; try { ind_names = dt.getColumn(SAMPLE_NAME_H); markers = dt.getColumn(MARKER_H); } catch (Exception& e) { throw e; } map indname_marker; for (size_t i = 0; i < dt.getNumberOfRows(); i++) { string test_lab = dt(i, SAMPLE_NAME_H) + dt(i, MARKER_H); if (indname_marker.find(test_lab) != indname_marker.end()) { string new_lab = dt(i, SAMPLE_NAME_H) + "_" + TextTools::toString(indname_marker[test_lab] + 1); dt (i, SAMPLE_NAME_H) = new_lab; } indname_marker[test_lab]++; } ind_names = dt.getColumn(SAMPLE_NAME_H); map ind_count = VectorTools::countValues(ind_names); ind_names = VectorTools::unique(ind_names); markers = VectorTools::unique(markers); size_t loc_nbr = markers.size(); /* * Loci number */ data_set.initAnalyzedLoci(loc_nbr); /* * Group of individuals */ data_set.addEmptyGroup(0); for (unsigned int i = 0; i < ind_names.size(); i++) { Individual ind(ind_names[i]); data_set.addIndividualToGroup(data_set.getGroupPosition(0), ind); } /* * Loci data */ AnalyzedLoci al(markers.size()); vector col_names = dt.getColumnNames(); // Finds columns containing allele data vector alleles_cols; for (unsigned int i = 0; i < col_names.size(); i++) { if (TextTools::startsWith(col_names[i], ALLELE_H)) alleles_cols.push_back(i); } // Set LocusInfo vector > alleles_pos; for (unsigned int i = 0; i < markers.size(); i++) { al.setLocusInfo(i, LocusInfo(markers[i], LocusInfo::UNKNOWN)); } std::map< std::string, std::set< std::string > > markerAlleles; for (unsigned int i = 0; i < dt.getNumberOfRows(); ++i) { for (unsigned int j = 0; j < alleles_cols.size(); ++j) { if (dt(i, alleles_cols[j]) != "") { markerAlleles[dt(i, MARKER_H)].insert(dt(i, alleles_cols[j])); } } } for (std::map< std::string, std::set< std::string > >::iterator itm = markerAlleles.begin(); itm != markerAlleles.end(); itm++) { std::set< std::string >& s = itm->second; for (std::set< std::string >::iterator its = s.begin(); its != s.end(); its++) { al.addAlleleInfoByLocusName(itm->first, BasicAlleleInfo(*its)); } } data_set.setAnalyzedLoci(al); /* * Individuals informations */ size_t ind_col_index = VectorTools::which(dt.getColumnNames(), SAMPLE_NAME_H); size_t mark_col_index = VectorTools::which(dt.getColumnNames(), MARKER_H); for (size_t i = 0; i < dt.getNumberOfRows(); i++) { vector alleles; for (size_t j = 0; j < alleles_cols.size(); j++) { if (!TextTools::isEmpty(dt(i, alleles_cols[j]))) { unsigned int num = (data_set.getLocusInfoByName(dt(i, mark_col_index))).getAlleleInfoKey(dt(i, alleles_cols[j])); alleles.push_back(num); } } alleles = VectorTools::unique(alleles); MultiAlleleMonolocusGenotype ma(alleles); if (!data_set.getIndividualByIdFromGroup(0, dt(i, ind_col_index))->hasGenotype()) data_set.initIndividualGenotypeInGroup(0, data_set.getIndividualPositionInGroup(0, dt(i, ind_col_index))); if (alleles.size()) data_set.setIndividualMonolocusGenotypeInGroup(0, data_set.getIndividualPositionInGroup(0, dt(i, ind_col_index)), data_set.getAnalyzedLoci()->getLocusInfoPosition(dt(i, mark_col_index)), ma); } delete dtp; } void GeneMapperCsvExport::read(const std::string& path, DataSet& data_set) throw (Exception) { AbstractIDataSet::read(path, data_set); } DataSet* GeneMapperCsvExport::read(std::istream& is) throw (Exception) { return AbstractIDataSet::read(is); } DataSet* GeneMapperCsvExport::read(const std::string& path) throw (Exception) { return AbstractIDataSet::read(path); } // --- GeneMapperCsvExport::Record --- GeneMapperCsvExport::Record::Record(const std::string& row) : sampleFile_(), sampleName_(), panel_(), markerName_(), dye_(), alleles_(), dac_(), an_(0.) { StringTokenizer st(row, "\t", true, false); /* if (st.numberOfRemainingTokens() != 7 + 4 * alleleNumber) { throw Exception("GeneMapperCsvExport::Record::Record: bad number of allele"); } */ size_t itemNum = st.numberOfRemainingTokens(); size_t alleleNum = (itemNum - 7) / 4; sampleFile_ = st.getToken(0); sampleName_ = st.getToken(1); panel_ = st.getToken(2); markerName_ = st.getToken(3); dye_ = st.getToken(4); dac_ = st.getToken(itemNum - 2); an_ = TextTools::toDouble(st.getToken(itemNum - 1)); for (unsigned int i = 0; i < alleleNum; ++i) { GeneMapperCsvExport::Allele al( st.getToken(5 + i), TextTools::toDouble(st.getToken(5 + alleleNum + i)), TextTools::to(st.getToken(5 + (2 * alleleNum) + i)), TextTools::toDouble(st.getToken(5 + (3 * alleleNum) + i)) ); alleles_.push_back(al); } } bpp-popgen-2.1.0/src/Bpp/PopGen/Group.h000644 000000 000000 00000043344 12147656633 017531 0ustar00rootroot000000 000000 // // File Group.h // Author : Sylvain Gaillard // Khalid Belkhir // Last modification : Thursday July 29 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #ifndef _GROUP_H_ #define _GROUP_H_ // From STL #include #include #include #include // From SeqLib #include #include #include // From local #include "Individual.h" #include "GeneralExceptions.h" namespace bpp { /** * @brief The Group class. * * A Group is an ensemble of Individuals with some statistics like the average * allele number. * * @author Sylvain Gaillard */ class Group { protected: size_t id_; std::string name_; std::vector individuals_; public: // Constructors and destructor : /** * @brief Build a void new Group. */ Group(size_t group_id); /** * @brief Copy constructor. * * If you need to use a copy constructor in a DataSet context, use the one * which specify a new Group Id. */ Group(const Group& group); /** * @brief A duplication constructor with new Group Id. */ Group(const Group& group, size_t group_id); /** * @brief Destroy an Group. */ ~Group(); public: /** * @brief The assignation operator =. */ Group& operator=(const Group& group); /** * @brief Set the id of the Group. * * @param group_id The id of the Group as an size_t. */ void setGroupId(size_t group_id); /** * @brief Get the name of the Group. * * @return The name of the Group as a string. */ const std::string& getGroupName() const { return name_; } /** * @brief Set the name of the Group. * * @param group_name Name of the Group as string. */ void setGroupName(const std::string& group_name); /** * @brief Get the id of the Group. * * @return The id of the Group as an size_t. */ size_t getGroupId() const { return id_; } /** * @brief Add an Individual. * * Add an Individual to the group. * * @param ind The Individual to add to the Group. * @throw BadIdentifierException if individual's identifier is already in use. */ void addIndividual(const Individual& ind) throw (BadIdentifierException); /** * @brief Add an empty Individual to the Group. * * @throw BadIdentifierException if individual_id is already in use. */ void addEmptyIndividual(const std::string& individual_id) throw (BadIdentifierException); /** * @brief Get the number of Individual in the Group. * * @return An integer as the number of Individual. */ size_t getNumberOfIndividuals() const; /** * @brief Get the maximum number of sequence. * * Give the value of the highest sequence key. This value is usefull to * discover the missing sequences data for each individual. */ size_t getMaxNumberOfSequences() const; /** * @brief Get the position of an Individual. * * @throw IndividualNotFoundException if individual_id is not found. */ size_t getIndividualPosition(const std::string& individual_id) const throw (IndividualNotFoundException); /** * @brief Get a reference to an Individual. * * @param individual_id The id of the Individual to find. * * @return A pointer to the Individual or NULL if the Individual is not found. */ const Individual& getIndividualById(const std::string& individual_id) const throw (IndividualNotFoundException); /** * @brief Get a reference to an Individual by its position. * * @param individual_position The position of the Individual in the group. * * @return A pointer to the Individual. * @throw IndividualNotFoundException if individual_id is not found. */ const Individual& getIndividualAtPosition(size_t individual_position) const throw (IndexOutOfBoundsException); /** * @brief Remove an Individual from the Group. * * @param individual_id The id of the Individual to remove. * * @return An std::auto_ptr to the removed Individual. * @throw IndividualNotFoundException if individual_id is not found. * * Search an Individual in the Group by cheking the id and remove it * if it is found then return a pointer to this Individual. */ std::auto_ptr removeIndividualById(const std::string& individual_id) throw (IndividualNotFoundException); /** * @brief Remove an Individual from the Group. * * @param individual_position The position in the Group of the Individual to remove. * * @return An std::auto_ptr to the removed Individual. * * Remove the individual at the specified position and return a pointer * to this Individual. */ std::auto_ptr removeIndividualAtPosition(size_t individual_position) throw (IndexOutOfBoundsException); /** * @brief Delete an Individual from the Group. * * @param individual_id The id of the Individual to delete. * @throw IndividualNotFoundException if individual_id is not found. * * Search an Individual in the Group by cheking the id and delete it * if it is foundi and free the memory by calling the destructor of the * Individual. */ void deleteIndividualById(const std::string& individual_id) throw (IndividualNotFoundException); /** * @brief Delete an Individual from the Group. * * @param individual_position The position in the Group of the Individual to delete. * * Free the memory by calling the destructor of the Individual. */ void deleteIndividualAtPosition(size_t individual_position) throw (IndexOutOfBoundsException); /** * @brief Clear the Group. * * Delete all the Individuals of the group. */ void clear(); // -- Dealing with Individuals ----------------------------- /** * @brief Set the sex of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. */ void setIndividualSexAtPosition(size_t individual_position, const unsigned short sex) throw (IndexOutOfBoundsException); /** * @brief Get the sex of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. */ unsigned short getIndividualSexAtPosition(size_t individual_position) const throw (IndexOutOfBoundsException); /** * @brief Set the date of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. */ void setIndividualDateAtPosition(size_t individual_position, const Date& date) throw (IndexOutOfBoundsException); /** * @brief Get the date of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if the Individual has no date. */ const Date& getIndividualDateAtPosition(size_t individual_position) const throw (Exception); /** * @brief Set the coordinates of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. */ void setIndividualCoordAtPosition(size_t individual_position, const Point2D& coord) throw (IndexOutOfBoundsException); /** * @brief Get the coordinates of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if the individual has no coordinate. */ const Point2D& getIndividualCoordAtPosition(size_t individual_position) const throw (Exception); /** * @brief Set the locality of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. */ void setIndividualLocalityAtPosition(size_t individual_position, const Locality* locality) throw (IndexOutOfBoundsException); /** * @brief Get the locality of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if the individual has no locality. */ const Locality& getIndividualLocalityAtPosition(size_t individual_position) const throw (Exception); /** * @brief Add a sequence to an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw AlphabetMismatchException if the sequence's alphabet doesn't match the container's alphabet. * @throw BadIdentifierException if the sequence's name is already in use. * @throw BadIntegerException if sequence_position is already in use. */ void addIndividualSequenceAtPosition(size_t individual_position, size_t sequence_position, const Sequence& sequence) throw (Exception); /** * @brief Get a sequence of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if there is no sequence container defined in the individual. * @throw SequenceNotFoundException if sequence_name is not found. */ const Sequence& getIndividualSequenceByName(size_t individual_position, const std::string& sequence_name) const throw (Exception); /** * @brief Get a sequence of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if there is no sequence container defined in the individual. * @throw SequenceNotFoundException if sequence_position is not found. */ const Sequence& getIndividualSequenceAtPosition(size_t individual_position, size_t sequence_position) const throw (Exception); /** * @brief Delete a sequence of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if there is no sequence container defined in the individual. * @throw SequenceNotFoundException if sequence_name is not found. */ void deleteIndividualSequenceByName(size_t individual_position, const std::string& sequence_name) throw (Exception); /** * @brief Delete a sequence of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if there is no sequence container defined in the individual. * @throw SequenceNotFoundException if sequence_position is not found. */ void deleteIndividualSequenceAtPosition(size_t individual_position, size_t sequence_position) throw (Exception); /** * @brief Tell if the Individual has some sequences. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. */ bool hasIndividualSequences(size_t individual_position) const throw (IndexOutOfBoundsException); /** * @brief Get the sequences' names from an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if there is no sequence container defined in the individual. */ std::vector getIndividualSequencesNames(size_t individual_position) const throw (Exception); /** * @brief Get the position of a sequence in an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if there is no sequence container defined in the individual. * @throw SequenceNotFoundException if sequence_name is not found. */ size_t getIndividualSequencePosition(size_t individual_position, const std::string& sequence_name) const throw (Exception); /** * @brief Get the number of sequences in an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if there is no sequence container defined in the individual. */ size_t getIndividualNumberOfSequences(size_t individual_position) const throw (Exception); /** * @brief Set all the sequences by copying an OrderedSequenceContainer. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. */ void setIndividualSequences(size_t individual_position, const MapSequenceContainer& msc) throw (IndexOutOfBoundsException); /** * @brief Set the genotype of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. */ void setIndividualGenotype(size_t individual_position, const MultilocusGenotype& genotype) throw (IndexOutOfBoundsException); /** * @brief Initialyze the genotype of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw BadIntegerException if loci_number < 1. * @throw Exception if the individual already has a genotype. */ void initIndividualGenotype(size_t individual_position, size_t loci_number) throw (Exception); /** * @brief Delete the genotype of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. */ void deleteIndividualGenotype(size_t individual_position) throw (IndexOutOfBoundsException); /** * @brief Tell if an Individual has a genotype. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. */ bool hasIndividualGenotype(size_t individual_position) const throw (IndexOutOfBoundsException); /** * @brief Set a MonolocusGenotype of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if the individual has no genotype. * @throw IndexOutOfBoundsException if locus_position excedes the number of locus. */ void setIndividualMonolocusGenotype(size_t individual_position, size_t locus_position, const MonolocusGenotype& monogen) throw (Exception); /** * @brief Set a MonolocusGenotype of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if the individual has no genotype. * @throw IndexOutOfBoundsException if locus_position excedes the number of locus. * @throw Exception if there is no key in allele_keys. */ void setIndividualMonolocusGenotypeByAlleleKey(size_t individual_position, size_t locus_position, const std::vector& allele_keys) throw (Exception); /** * @brief Set a MonolocusGenotype of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if the individual has no genotype. * @throw IndexOutOfBoundsException if locus_position excedes the number of locus. * @throw AlleleNotFoundException if at least one id is not found in locus_info. */ void setIndividualMonolocusGenotypeByAlleleId(size_t individual_position, size_t locus_position, const std::vector& allele_id, const LocusInfo& locus_info) throw (Exception); /** * @brief Get a MonolocusGenotype of an Individual. * * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals. * @throw NullPointerException if the individual has no genotype. * @throw IndexOutOfBoundsException if locus_position excedes the number of locus. */ const MonolocusGenotype& getIndividualMonolocusGenotype(size_t individual_position, size_t locus_position) const throw (Exception); /** * @brief Tell if at least one individual has at least one sequence. */ bool hasSequenceData() const; /** * @brief Get the alphabet used for the sequences. */ const Alphabet* getAlphabet() const throw (NullPointerException); /** * @brief Get the number of individual that have a data at the specified locus. */ size_t getGroupSizeForLocus(size_t locus_position) const; /** * @brief Get the number of individual that have a sequence at the specified position. */ size_t getGroupSizeForSequence(size_t sequence_position) const; }; } // end of namespace bpp; #endif // _GROUP_H_ bpp-popgen-2.1.0/src/Bpp/PopGen/Genepop.cpp000644 000000 000000 00000015167 12147656633 020367 0ustar00rootroot000000 000000 // // File Genepop.cpp // Author : Sylvain Gaillard // Last modification : Tuesday September 21 2004 // /* Copyright or © or Copr. CNRS, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "Genepop.h" using namespace bpp; using namespace std; Genepop::Genepop() {} Genepop::~Genepop() {} void Genepop::read(istream& is, DataSet& data_set) throw (Exception) { if (!is) throw IOException("Genepop::read: fail to open stream."); // Skip first line FileTools::getNextLine(is); ios::pos_type entry_point = is.tellg(); bool eof_ok = false; bool loc_def_ok = false; bool loc_nbr_ok = false; size_t grp_nbr = 0; vector tmp_loc; vector > al_ids; map ind_id_count; map ind_id_index; string temp(""); // First read : file structure while (!eof_ok) { if (is.peek() == EOF && !eof_ok) { // If eof rewind to entry_point is.seekg(entry_point); eof_ok = true; } else { // Count everything temp = FileTools::getNextLine(is); string cp_temp = TextTools::removeSurroundingWhiteSpaces(temp); cp_temp = TextTools::toUpper(cp_temp); if (cp_temp == string("POP")) { loc_def_ok = true; grp_nbr++; data_set.addEmptyGroup(grp_nbr); } if (!loc_def_ok) { StringTokenizer st(temp, string(", "), true); while (st.hasMoreToken()) tmp_loc.push_back(LocusInfo(TextTools::removeSurroundingWhiteSpaces(st.nextToken()))); } if (loc_def_ok && !loc_nbr_ok) { al_ids.resize(tmp_loc.size()); loc_nbr_ok = true; } if (loc_def_ok) { string alleles; StringTokenizer st(temp, string(",")); if (st.numberOfRemainingTokens() == 2) { ind_id_count[TextTools::removeSurroundingWhiteSpaces(st.nextToken())]++; alleles = st.nextToken(); } StringTokenizer st2(alleles); if ((size_t)st2.numberOfRemainingTokens() == tmp_loc.size()) { size_t i = 0; while (st2.hasMoreToken()) { string ids = TextTools::removeSurroundingWhiteSpaces(st2.nextToken()); string tmp_id = string(ids.begin(), ids.begin() + (ids.size() / 2)); if (tmp_id != string("00") && tmp_id != string("000")) al_ids[i].insert(tmp_id); tmp_id = string(ids.begin() + (ids.size() / 2), ids.end()); if (tmp_id != string("00") && tmp_id != string("000")) al_ids[i].insert(tmp_id); i++; } } } } } // Set AnalyzedLoci data_set.initAnalyzedLoci(tmp_loc.size()); for (size_t i = 0; i < tmp_loc.size(); i++) { data_set.setLocusInfo(i, tmp_loc[i]); for (set::iterator it = al_ids[i].begin(); it != al_ids[i].end(); it++) { data_set.addAlleleInfoByLocusPosition(i, BasicAlleleInfo(*it)); } } // Second read : file data grp_nbr = 0; size_t grp_pos = 0; loc_def_ok = false; while (!is.eof()) { temp = FileTools::getNextLine(is); string cp_temp = TextTools::removeSurroundingWhiteSpaces(temp); cp_temp = TextTools::toUpper(cp_temp); if (cp_temp == string("POP")) { grp_nbr++; loc_def_ok = true; grp_pos = data_set.getGroupPosition(grp_nbr); } else { if (loc_def_ok) { string alleles; StringTokenizer st(temp, string(",")); size_t ind_pos = 0; if (st.numberOfRemainingTokens() == 2) { string ind_id = TextTools::removeSurroundingWhiteSpaces(st.nextToken()); if (ind_id_count[ind_id] > 1) ind_id = ind_id + string("_") + TextTools::toString(++ind_id_index[ind_id]); data_set.addEmptyIndividualToGroup(grp_pos, ind_id); ind_pos = data_set.getIndividualPositionInGroup(grp_pos, ind_id); data_set.initIndividualGenotypeInGroup(grp_pos, ind_pos); alleles = st.nextToken(); } StringTokenizer st2(alleles); if ((size_t)st2.numberOfRemainingTokens() == tmp_loc.size()) { size_t i = 0; while (st2.hasMoreToken()) { string ids = TextTools::removeSurroundingWhiteSpaces(st2.nextToken()); vector tmp_ids; tmp_ids.push_back(string(ids.begin(), ids.begin() + (ids.size() / 2))); tmp_ids.push_back(string(ids.begin() + (ids.size() / 2), ids.end())); if (tmp_ids[0] != string("00") && tmp_ids[0] != string("000") && tmp_ids[1] != string("00") && tmp_ids[1] != string("000")) { data_set.setIndividualMonolocusGenotypeByAlleleIdInGroup(grp_pos, ind_pos, i, tmp_ids); } i++; tmp_ids.clear(); } } } } } } void Genepop::read(const string& path, DataSet& data_set) throw (Exception) { AbstractIDataSet::read(path, data_set); } DataSet* Genepop::read(istream& is) throw (Exception) { return AbstractIDataSet::read(is); } DataSet* Genepop::read(const string& path) throw (Exception) { return AbstractIDataSet::read(path); } bpp-popgen-2.1.0/src/Bpp/PopGen/PolymorphismSequenceContainer.cpp000644 000000 000000 00000035767 12147656633 025040 0ustar00rootroot000000 000000 // // File: PolymorphismSequenceContainer.h // Created by: Eric Bazin // Sylvain Gaillard // Created on: Wednesday August 04 2004 // /* Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) This software is a computer program whose purpose is to provide classes for population genetics analysis. This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms. */ #include "PolymorphismSequenceContainer.h" using namespace bpp; using namespace std; /******************************************************************************/ PolymorphismSequenceContainer::PolymorphismSequenceContainer(const Alphabet* alpha) : VectorSiteContainer(alpha), ingroup_(vector()), count_(vector()), group_(vector()) {} /******************************************************************************/ PolymorphismSequenceContainer::PolymorphismSequenceContainer(size_t size, const Alphabet* alpha) : VectorSiteContainer(size, alpha), ingroup_(vector(size)), count_(vector(size)), group_(vector(size)) {} /******************************************************************************/ PolymorphismSequenceContainer::PolymorphismSequenceContainer(const OrderedSequenceContainer& sc) : VectorSiteContainer(sc), ingroup_(vector(sc.getNumberOfSequences(), true)), count_(vector(sc.getNumberOfSequences(), 1)), group_(vector(sc.getNumberOfSequences(), 1)) {} /******************************************************************************/ PolymorphismSequenceContainer::PolymorphismSequenceContainer(const SiteContainer& sc) : VectorSiteContainer(sc), ingroup_(vector(sc.getNumberOfSequences(), true)), count_(vector(sc.getNumberOfSequences(), 1)), group_(vector(sc.getNumberOfSequences(), 1)) {} /******************************************************************************/ PolymorphismSequenceContainer::PolymorphismSequenceContainer(const PolymorphismSequenceContainer& psc) : VectorSiteContainer(psc), ingroup_(vector(psc.getNumberOfSequences())), count_(vector(psc.getNumberOfSequences())), group_(vector(psc.getNumberOfSequences())) { for (size_t i = 0; i < psc.getNumberOfSequences(); i++) { count_[i] = psc.getSequenceCount(i); ingroup_[i] = psc.isIngroupMember(i); group_[i] = psc.getGroupId(i); } } /******************************************************************************/ PolymorphismSequenceContainer& PolymorphismSequenceContainer::operator=(const PolymorphismSequenceContainer& psc) { VectorSiteContainer::operator=(psc); // Setting up the sequences comments, numbers and ingroup state size_t nbSeq = psc.getNumberOfSequences(); count_.resize(nbSeq); ingroup_.resize(nbSeq); group_.resize(nbSeq); for (size_t i = 0; i < nbSeq; i++) { count_[i] = psc.getSequenceCount(i); ingroup_[i] = psc.isIngroupMember(i); group_[i] = psc.getGroupId(i); } return *this; } /******************************************************************************/ // ** Class destructor: *******************************************************/ PolymorphismSequenceContainer::~PolymorphismSequenceContainer() { clear(); } /******************************************************************************/ // ** Other methodes: *********************************************************/ Sequence* PolymorphismSequenceContainer::removeSequence(size_t index) throw (IndexOutOfBoundsException) { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::removeSequence: index out of bounds.", index, 0, getNumberOfSequences()); count_.erase(count_.begin() + index); ingroup_.erase(ingroup_.begin() + index); group_.erase(group_.begin() + index); return VectorSiteContainer::removeSequence(index); } /******************************************************************************/ Sequence* PolymorphismSequenceContainer::removeSequence(const std::string& name) throw (SequenceNotFoundException) { try { return removeSequence(getSequencePosition(name)); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("PolymorphismSequenceContainer::removeSequence.", name); } } /******************************************************************************/ void PolymorphismSequenceContainer::deleteSequence(size_t index) throw (IndexOutOfBoundsException) { try { delete removeSequence(index); } catch (IndexOutOfBoundsException& ioobe) { throw IndexOutOfBoundsException("PolymorphismSequenceContainer::deleteSequence.", index, 0, getNumberOfSequences()); } } /******************************************************************************/ void PolymorphismSequenceContainer::deleteSequence(const std::string& name) throw (SequenceNotFoundException) { try { delete removeSequence(name); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("PolymorphismSequenceContainer::deleteSequence.", name); } } /******************************************************************************/ void PolymorphismSequenceContainer::addSequence(const Sequence& sequence, size_t effectif, bool checkNames) throw (Exception) { try { VectorSiteContainer::addSequence(sequence, checkNames); } catch (Exception& e) { throw e; } count_.push_back(effectif); ingroup_.push_back(true); group_.push_back(0); } /******************************************************************************/ void PolymorphismSequenceContainer::clear() { VectorSiteContainer::clear(); count_.clear(); ingroup_.clear(); group_.clear(); } /******************************************************************************/ size_t PolymorphismSequenceContainer::getGroupId(size_t index) const throw (IndexOutOfBoundsException) { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::getGroupId: index out of bounds.", index, 0, getNumberOfSequences()); return group_[index]; } /******************************************************************************/ size_t PolymorphismSequenceContainer::getGroupId(const std::string& name) const throw (SequenceNotFoundException) { try { return group_[getSequencePosition(name)]; } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("PolymorphismSequenceContainer::getGroupId.", name); } } /******************************************************************************/ std::set PolymorphismSequenceContainer::getAllGroupsIds() const { set grp_ids; for (size_t i = 0; i < group_.size(); i++) { grp_ids.insert(group_[i]); } return grp_ids; } /******************************************************************************/ void PolymorphismSequenceContainer::setGroupId(size_t index, size_t group_id) throw (IndexOutOfBoundsException) { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::setGroupId: index out of bounds.", index, 0, getNumberOfSequences()); group_[index] = group_id; } /******************************************************************************/ void PolymorphismSequenceContainer::setGroupId(const std::string& name, size_t group_id) throw (SequenceNotFoundException) { try { group_[getSequencePosition(name)] = group_id; } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("PolymorphismSequenceContainer::setGroupId.", name); } } /******************************************************************************/ size_t PolymorphismSequenceContainer::getNumberOfGroups() const { return getAllGroupsIds().size(); } /******************************************************************************/ bool PolymorphismSequenceContainer::isIngroupMember(size_t index) const throw (IndexOutOfBoundsException) { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::isIngroupMember: index out of bounds.", index, 0, getNumberOfSequences()); return ingroup_[index]; } /******************************************************************************/ bool PolymorphismSequenceContainer::isIngroupMember(const std::string& name) const throw (SequenceNotFoundException) { try { return ingroup_[getSequencePosition(name)]; } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("PolymorphismSequenceContainer::isIngroupMember.", name); } } /******************************************************************************/ void PolymorphismSequenceContainer::setAsIngroupMember(size_t index) throw (IndexOutOfBoundsException) { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::setAsIngroupMember.", index, 0, getNumberOfSequences()); ingroup_[index] = true; } /******************************************************************************/ void PolymorphismSequenceContainer::setAsIngroupMember(const std::string& name) throw (SequenceNotFoundException) { try { size_t seqPos = getSequencePosition(name); ingroup_[seqPos] = true; } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("PolymorphismSequenceContainer::setAsIngroupMember.", name); } } /******************************************************************************/ void PolymorphismSequenceContainer::setAsOutgroupMember(size_t index) throw (IndexOutOfBoundsException) { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::setAsOutgroupMember.", index, 0, getNumberOfSequences()); ingroup_[index] = false; } /******************************************************************************/ void PolymorphismSequenceContainer::setAsOutgroupMember(const std::string& name) throw (SequenceNotFoundException) { try { size_t seqPos = getSequencePosition(name); ingroup_[seqPos] = false; } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("PolymorphismSequenceContainer::setAsOutgroupMember.", name); } } /******************************************************************************/ void PolymorphismSequenceContainer::setSequenceCount(size_t index, size_t count) throw (Exception) { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::setSequenceCount.", index, 0, getNumberOfSequences()); if (count < 1) throw BadIntegerException("PolymorphismSequenceContainer::setSequenceCount: count can't be < 1.", static_cast(count)); count_[index] = count; } /******************************************************************************/ void PolymorphismSequenceContainer::setSequenceCount(const std::string& name, size_t count) throw (Exception) { try { setSequenceCount(getSequencePosition(name), count); } catch (BadIntegerException& bie) { throw bie; } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("PolymorphismSequenceContainer::setSequenceCount.", name); } } /******************************************************************************/ void PolymorphismSequenceContainer::incrementSequenceCount(size_t index) throw (IndexOutOfBoundsException) { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::incrementSequenceCount.", index, 0, getNumberOfSequences()); count_[index]++; } /******************************************************************************/ void PolymorphismSequenceContainer::incrementSequenceCount(const std::string& name) throw (SequenceNotFoundException) { try { incrementSequenceCount(getSequencePosition(name)); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("PolymorphismSequenceContainer::incrementSequenceCount.", name); } } /******************************************************************************/ void PolymorphismSequenceContainer::decrementSequenceCount(size_t index) throw (Exception) { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::decrementSequenceCount.", index, 0, getNumberOfSequences()); if (count_[index] - 1 < 1) throw BadIntegerException("PolymorphismSequenceContainer::decrementSequenceCount: count can't be < 1.", static_cast(count_[index] - 1)); count_[index]--; } /******************************************************************************/ void PolymorphismSequenceContainer::decrementSequenceCount(const std::string& name) throw (Exception) { try { decrementSequenceCount(getSequencePosition(name)); } catch (BadIntegerException& bie) { throw bie; } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("PolymorphismSequenceContainer::decrementSequenceCount.", name); } } /******************************************************************************/ size_t PolymorphismSequenceContainer::getSequenceCount(size_t index) const throw (IndexOutOfBoundsException) { if (index >= getNumberOfSequences()) throw IndexOutOfBoundsException("PolymorphismSequenceContainer::getSequenceCount.", index, 0, getNumberOfSequences()); return count_[index]; } /******************************************************************************/ size_t PolymorphismSequenceContainer::getSequenceCount(const std::string& name) const throw (SequenceNotFoundException) { try { return getSequenceCount(getSequencePosition(name)); } catch (SequenceNotFoundException& snfe) { throw SequenceNotFoundException("PolymorphismSequenceContainer::getSequenceCount.", name); } } /******************************************************************************/